## Overview of the dataframe

In [11]:
import pandas
from datetime import datetime
from pytz import utc

In [12]:
data = pandas.read_csv("reviews.csv", parse_dates=["Timestamp"])

In [13]:
data.head()

Unnamed: 0,Course Name,Timestamp,Rating,Comment
0,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 06:25:52+00:00,4.0,
1,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 05:12:34+00:00,4.0,
2,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 05:11:03+00:00,4.0,
3,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 03:33:24+00:00,5.0,
4,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 03:31:49+00:00,4.5,


## Selecting data from the dataframe

#### Select one column

In [14]:
data["Rating"]

0        4.0
1        4.0
2        4.0
3        5.0
4        4.5
        ... 
44995    4.0
44996    5.0
44997    5.0
44998    5.0
44999    5.0
Name: Rating, Length: 45000, dtype: float64

#### Select multiple columns

In [15]:
data[["Course Name", "Rating"]]

Unnamed: 0,Course Name,Rating
0,The Python Mega Course: Build 10 Real World Ap...,4.0
1,The Python Mega Course: Build 10 Real World Ap...,4.0
2,The Python Mega Course: Build 10 Real World Ap...,4.0
3,The Python Mega Course: Build 10 Real World Ap...,5.0
4,The Python Mega Course: Build 10 Real World Ap...,4.5
...,...,...
44995,Python for Beginners with Examples,4.0
44996,The Python Mega Course: Build 10 Real World Ap...,5.0
44997,The Python Mega Course: Build 10 Real World Ap...,5.0
44998,Python for Beginners with Examples,5.0


#### Select a row

In [16]:
data.iloc[3] # pick row 3

Course Name    The Python Mega Course: Build 10 Real World Ap...
Timestamp                              2021-04-02 03:33:24+00:00
Rating                                                       5.0
Comment                                                      NaN
Name: 3, dtype: object

#### Selecting multiple rows

In [17]:
data.iloc[1:3]

Unnamed: 0,Course Name,Timestamp,Rating,Comment
1,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 05:12:34+00:00,4.0,
2,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 05:11:03+00:00,4.0,


#### Selecting a section

In [18]:
data[["Course Name", "Rating"]].iloc[1:3]

Unnamed: 0,Course Name,Rating
1,The Python Mega Course: Build 10 Real World Ap...,4.0
2,The Python Mega Course: Build 10 Real World Ap...,4.0


#### Selecting a cell

In [19]:
data["Timestamp"].iloc[2]

Timestamp('2021-04-02 05:11:03+0000', tz='UTC')

In [20]:
data.at[2, "Timestamp"] # Recommended method

Timestamp('2021-04-02 05:11:03+0000', tz='UTC')

## Filtering data based on conditions

#### One condition

In [21]:
data[data["Rating"] > 4]

Unnamed: 0,Course Name,Timestamp,Rating,Comment
3,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 03:33:24+00:00,5.0,
4,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 03:31:49+00:00,4.5,
5,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 01:10:06+00:00,4.5,
6,The Python Mega Course: Build 10 Real World Ap...,2021-04-02 00:44:54+00:00,4.5,
7,The Python Mega Course: Build 10 Real World Ap...,2021-04-01 23:42:02+00:00,5.0,
...,...,...,...,...
44994,The Python Mega Course: Build 10 Real World Ap...,2018-01-01 01:19:24+00:00,5.0,
44996,The Python Mega Course: Build 10 Real World Ap...,2018-01-01 01:09:56+00:00,5.0,
44997,The Python Mega Course: Build 10 Real World Ap...,2018-01-01 01:08:11+00:00,5.0,
44998,Python for Beginners with Examples,2018-01-01 01:05:26+00:00,5.0,


In [22]:
data[data["Rating"] > 4].count()

Course Name    29758
Timestamp      29758
Rating         29758
Comment         4927
dtype: int64

In [23]:
data[data["Rating"] > 4].mean()

  data[data["Rating"] > 4].mean()
  data[data["Rating"] > 4].mean()


Rating    4.87316
dtype: float64

#### Multiple conditions

In [39]:
data[(data["Rating"] > 4) & (data["Course Name"] == 
                             "The Complete Python Course: Build 10 Professional OOP Apps")]

Unnamed: 0,Course Name,Timestamp,Rating,Comment
31,The Complete Python Course: Build 10 Professio...,2021-04-01 01:32:52+00:00,5.0,
34,The Complete Python Course: Build 10 Professio...,2021-03-31 22:53:04+00:00,5.0,
43,The Complete Python Course: Build 10 Professio...,2021-03-31 19:15:25+00:00,5.0,
45,The Complete Python Course: Build 10 Professio...,2021-03-31 17:23:15+00:00,5.0,
101,The Complete Python Course: Build 10 Professio...,2021-03-29 21:54:00+00:00,4.5,
...,...,...,...,...
2501,The Complete Python Course: Build 10 Professio...,2021-01-14 21:19:44+00:00,4.5,
2502,The Complete Python Course: Build 10 Professio...,2021-01-14 20:57:14+00:00,5.0,
2509,The Complete Python Course: Build 10 Professio...,2021-01-14 15:46:18+00:00,5.0,
2523,The Complete Python Course: Build 10 Professio...,2021-01-13 22:21:14+00:00,5.0,


In [38]:
data[(data["Rating"] > 4) & (data["Course Name"] == 
    "The Complete Python Course: Build 10 Professional OOP Apps")]["Rating"].mean()

4.908831908831909

## Time-based filtering

In [33]:
data[(data["Timestamp"] >= datetime(2020, 7, 1, tzinfo = utc)) & 
     (data["Timestamp"] <= datetime(2020, 12, 31, tzinfo = utc))]

Unnamed: 0,Course Name,Timestamp,Rating,Comment
3065,Interactive Data Visualization with Python and...,2020-12-30 23:28:34+00:00,3.0,
3066,The Python Mega Course: Build 10 Real World Ap...,2020-12-30 22:59:02+00:00,4.0,
3067,The Python Mega Course: Build 10 Real World Ap...,2020-12-30 22:40:10+00:00,4.5,
3068,The Python Mega Course: Build 10 Real World Ap...,2020-12-30 21:56:41+00:00,4.5,
3069,The Python Mega Course: Build 10 Real World Ap...,2020-12-30 21:14:34+00:00,4.5,
...,...,...,...,...
9729,The Python Mega Course: Build 10 Real World Ap...,2020-07-01 03:09:44+00:00,3.5,
9730,The Python Mega Course: Build 10 Real World Ap...,2020-07-01 03:09:12+00:00,5.0,
9731,The Python Mega Course: Build 10 Real World Ap...,2020-07-01 02:40:58+00:00,4.0,
9732,The Python Mega Course: Build 10 Real World Ap...,2020-07-01 02:04:02+00:00,5.0,nice


In [27]:
print("The average course rating is " + str(data["Rating"].mean()) + ".")

The average course rating is 4.442155555555556.


In [31]:
print("The average rating for this course is " + 
      str(data[data["Course Name"] == 
               "The Python Mega Course: Build 10 Real World Applications"]["Rating"].mean()) 
      + ".")

The average rating for this course is 4.477270180942244.


In [40]:
print("The average rating for 2020 is " + 
      str(data[(data["Timestamp"] >= datetime(2020, 1, 1, tzinfo = utc)) & 
               (data["Timestamp"] <= datetime(2020, 12, 31, tzinfo = utc))]["Rating"].mean()))

The average rating for 2020 is 4.476541271662339


In [41]:
print("The average rating for this course in 2020 is " +
     str(data[(data["Timestamp"] >= datetime(2020, 1, 1, tzinfo = utc)) & 
               (data["Timestamp"] <= datetime(2020, 12, 31, tzinfo = utc)) &
             (data["Course Name"] == 
               "The Python Mega Course: Build 10 Real World Applications")]["Rating"].mean()))

The average rating for this course in 2020 is 4.506076089208037


In [54]:
print("The average rating from users who did not leave comments is " +
     str(data[data["Comment"].isnull()]["Rating"].mean()))

The average rating from users who did not leave comments is 4.433679746603492


In [55]:
print("The average rating from users who left comments is " +
     str(data[data["Comment"].notnull()]["Rating"].mean()))

The average rating from users who left comments is 4.489777908515959


In [47]:
print("The number of ratings from users who did not leave comments is " +
     str(len(data[data["Comment"].isnull()]["Rating"])))

The number of ratings from users who did not leave comments is 38201


In [48]:
print("The number of ratings from users who left comments is " +
     str(len(data[data["Comment"].notnull()]["Rating"])))

The number of ratings from users who left comments is 6799


In [53]:
print("The number of ratings from users who left comments about the professor's accent is " +
     str(len(data[data["Comment"].str.contains("accent", na = False)]["Rating"])))

The number of ratings from users who left comments about the professor's accent is 77


In [56]:
print("The average rating from users who left comments about the professor's accent is " +
     str(data[data["Comment"].str.contains("accent", na = False)]["Rating"].mean()))

The average rating from users who left comments about the professor's accent is 3.8636363636363638
