In [8]:
import pandas as pd

df = pd.read_csv('data/StudentsPerformance.csv')
df.head(5)

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [9]:
# a. Students with math score > 80 and completed test prep
a = df[(df['math score'] > 80) & (df['test preparation course'] == 'completed')]
print("a.\n", a)

a.
      gender race/ethnicity parental level of education         lunch  \
6    female        group B                some college      standard   
35     male        group E          associate's degree      standard   
49     male        group C                 high school      standard   
56   female        group E          associate's degree      standard   
104    male        group C                some college      standard   
..      ...            ...                         ...           ...   
934    male        group C          associate's degree      standard   
938    male        group D                some college      standard   
987    male        group E            some high school      standard   
990    male        group E                 high school  free/reduced   
995  female        group E             master's degree      standard   

    test preparation course  math score  reading score  writing score  
6                 completed          88             95     

In [10]:
# b. Merge with demographic info (subset)
demo = pd.DataFrame({
    'race/ethnicity': ['group A', 'group B', 'group C', 'group D', 'group E'],
    'socioeconomic status': ['low', 'mid-low', 'mid', 'mid-high', 'high']
})
b = pd.merge(df, demo, on='race/ethnicity', how='left')
print("\nb.\n", b.head())


b.
    gender race/ethnicity parental level of education         lunch  \
0  female        group B           bachelor's degree      standard   
1  female        group C                some college      standard   
2  female        group B             master's degree      standard   
3    male        group A          associate's degree  free/reduced   
4    male        group C                some college      standard   

  test preparation course  math score  reading score  writing score  \
0                    none          72             72             74   
1               completed          69             90             88   
2                    none          90             95             93   
3                    none          47             57             44   
4                    none          76             78             75   

  socioeconomic status  
0              mid-low  
1                  mid  
2              mid-low  
3                  low  
4                  mid

In [11]:
# c. Sort by reading and writing score (descending)
c = df.sort_values(by=['reading score', 'writing score'], ascending=False)
print("\nc.\n", c.head())


c.
      gender race/ethnicity parental level of education     lunch  \
106  female        group D             master's degree  standard   
114  female        group E           bachelor's degree  standard   
165  female        group C           bachelor's degree  standard   
179  female        group D            some high school  standard   
458  female        group E           bachelor's degree  standard   

    test preparation course  math score  reading score  writing score  
106                    none          87            100            100  
114               completed          99            100            100  
165               completed          96            100            100  
179               completed          97            100            100  
458                    none         100            100            100  


In [12]:
# d. Transpose average scores by gender
d = df.groupby('gender')[['math score', 'reading score', 'writing score']].mean().transpose()
print("\nd.\n", d)


d.
 gender            female       male
math score     63.633205  68.728216
reading score  72.608108  65.473029
writing score  72.467181  63.311203


In [13]:
# e. Pivot table: average scores by lunch and test prep
e = df.pivot_table(
    index='lunch',
    columns='test preparation course',
    values=['math score', 'reading score', 'writing score'],
    aggfunc='mean'
)
print("\ne.\n", e)


e.
                         math score            reading score             \
test preparation course  completed       none     completed       none   
lunch                                                                    
free/reduced             63.045802  56.508929     69.870229  61.602679   
standard                 73.533040  68.133971     76.215859  69.177033   

                        writing score             
test preparation course     completed       none  
lunch                                             
free/reduced                70.351145  58.736607  
standard                    76.766520  67.595694  
