In [1]:
import pandas as pd

In [2]:
data=pd.read_csv('data/course_eval.csv')
data.head()

Unnamed: 0,Instructor,Course,Semester,Year,Evaluation
0,Safadi,MIST4610,Fall,2019,5
1,Aguar,MIST6380,Spring,2018,4
2,Safadi,MIST5730,Summer,2018,3
3,Boudreau,MIST4610,Fall,2018,4
4,Safadi,MIST4610,Summer,2017,3


In [4]:
for x in data:
    print(x)
    #break

Instructor
Course
Semester
Year
Evaluation


The data frame contains MIS instructor course evaluation for several courses over different semesters/years.

**NOTE** This is a simulated data. The course evaluation numbers are not real!

1. How many courses did each instructor teach?
2. What is the average course evaluation per instructor?
3. Report the minimum, median, and maximum evaluation of `Safadi`
4. Report the average evaluation per semester and year.
5. Format the previous result in a data frame where rows are semesters and years are columns.
6. Transform evaluation by subtracting the average evaluation per course (de-mean evaluation per course).
7. Filter the data to keep entries in which `Evaluation` is larger than the average evaluation
8. Filter the data to keep entries in which `Evaluation` is larger than the average evaluation per `Course` 

In [3]:
test_1 = data.groupby('Instructor').Course.count()
test_1

Instructor
Aguar         22
Boudreau      17
Safadi        23
Srinivasan    16
Name: Course, dtype: int64

In [4]:
test_2 = data.groupby('Instructor').Evaluation.mean()
test_2

Instructor
Aguar         2.909091
Boudreau      3.411765
Safadi        2.739130
Srinivasan    3.375000
Name: Evaluation, dtype: float64

In [7]:
test_3 = data.groupby('Instructor').Evaluation.aggregate(['min', 'median', 'max']).loc['Safadi']
# or data.loc[data.Instructor == 'Safadi', 'Evaluation'].aggregate(['min', 'median', 'max'])
test_3

min       1
median    3
max       5
Name: Safadi, dtype: int64

In [11]:
test_4 = data.groupby(['Semester', 'Year']).Evaluation.mean()
test_4

Semester  Year
Fall      2017    2.666667
          2018    2.200000
          2019    3.333333
          2020    3.500000
Spring    2017    2.833333
          2018    3.250000
          2019    3.000000
          2020    3.571429
Summer    2017    2.166667
          2018    3.625000
          2019    2.500000
          2020    3.000000
Name: Evaluation, dtype: float64

In [12]:
test_5 = test_4.unstack()
test_5

Year,2017,2018,2019,2020
Semester,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fall,2.666667,2.2,3.333333,3.5
Spring,2.833333,3.25,3.0,3.571429
Summer,2.166667,3.625,2.5,3.0


In [16]:
data['Demeaned_evaluation'] = data.groupby('Course')['Evaluation'].transform(lambda x: x - x.mean())
data.head()

Unnamed: 0,Instructor,Course,Semester,Year,Evaluation,Demeaned_evaluation
0,Safadi,MIST4610,Fall,2019,5,1.5
1,Aguar,MIST6380,Spring,2018,4,1.25
2,Safadi,MIST5730,Summer,2018,3,0.0625
3,Boudreau,MIST4610,Fall,2018,4,0.5
4,Safadi,MIST4610,Summer,2017,3,-0.5


In [19]:
# overall mean  evaluation
data.Evaluation.mean()

3.0641025641025643

In [18]:
test_7 = data[data.Evaluation > data.Evaluation.mean()]
test_7.head()

Unnamed: 0,Instructor,Course,Semester,Year,Evaluation,Demeaned_evaluation
0,Safadi,MIST4610,Fall,2019,5,1.5
1,Aguar,MIST6380,Spring,2018,4,1.25
3,Boudreau,MIST4610,Fall,2018,4,0.5
5,Boudreau,MIST4600,Summer,2018,4,0.916667
6,Boudreau,MIST4610,Summer,2018,5,1.5


In [22]:
#  evaluation by course
data.groupby('Course').Evaluation.mean()

Course
MIST4600    3.083333
MIST4610    3.500000
MIST5730    2.937500
MIST6380    2.750000
Name: Evaluation, dtype: float64

In [28]:
better_than_average = data.groupby('Course').Evaluation.transform(lambda x: x > x.mean())
test_8 = data[better_than_average]
test_8.head()

Unnamed: 0,Instructor,Course,Semester,Year,Evaluation,Demeaned_evaluation
0,Safadi,MIST4610,Fall,2019,5,1.5
1,Aguar,MIST6380,Spring,2018,4,1.25
2,Safadi,MIST5730,Summer,2018,3,0.0625
3,Boudreau,MIST4610,Fall,2018,4,0.5
5,Boudreau,MIST4600,Summer,2018,4,0.916667


In [29]:
# or, given we already de-meaned the data
# select based on positive value of demeaned_evaluation
data[data.Demeaned_evaluation>0]

Unnamed: 0,Instructor,Course,Semester,Year,Evaluation,Demeaned_evaluation
0,Safadi,MIST4610,Fall,2019,5,1.5
1,Aguar,MIST6380,Spring,2018,4,1.25
2,Safadi,MIST5730,Summer,2018,3,0.0625
3,Boudreau,MIST4610,Fall,2018,4,0.5
5,Boudreau,MIST4600,Summer,2018,4,0.916667
6,Boudreau,MIST4610,Summer,2018,5,1.5
8,Safadi,MIST6380,Spring,2018,3,0.25
11,Safadi,MIST6380,Spring,2019,3,0.25
12,Safadi,MIST6380,Fall,2020,4,1.25
15,Aguar,MIST5730,Spring,2018,3,0.0625
