In [9]:
import pandas as pd
import numpy as np

In [10]:
# Create a sample dataset
student_df = pd.DataFrame({'name': ['Alice', 'Bob', 'Carol',
                                    'Dan', 'Eli', 'Fran'],
                           'gender': ['female', 'male', 'female', 
                                      'male', 'male', 'female'],
                           'class': ['FY', 'SO', 'SR',
                                     'SO',' JR', 'SR'],
                           'gpa': [90, 93, 97, 
                                   89, 95, 92],
                           'num_classes': [4, 3, 4, 
                                           4, 3, 2]})
student_df

Unnamed: 0,name,gender,class,gpa,num_classes
0,Alice,female,FY,90,4
1,Bob,male,SO,93,3
2,Carol,female,SR,97,4
3,Dan,male,SO,89,4
4,Eli,male,JR,95,3
5,Fran,female,SR,92,2


In [11]:
# Add a flag to highlight whether a student is female.
student_df['female_flag'] = student_df['gender'].apply(lambda x: x == 'female')
student_df

Unnamed: 0,name,gender,class,gpa,num_classes,female_flag
0,Alice,female,FY,90,4,True
1,Bob,male,SO,93,3,False
2,Carol,female,SR,97,4,True
3,Dan,male,SO,89,4,False
4,Eli,male,JR,95,3,False
5,Fran,female,SR,92,2,True


In [12]:
# Using a lambda function might be a bit overkill when you can just use a
# conditional statement to check the value of gender
student_df['female_flag'] = student_df['gender'] == "female"
student_df

Unnamed: 0,name,gender,class,gpa,num_classes,female_flag
0,Alice,female,FY,90,4,True
1,Bob,male,SO,93,3,False
2,Carol,female,SR,97,4,True
3,Dan,male,SO,89,4,False
4,Eli,male,JR,95,3,False
5,Fran,female,SR,92,2,True


In [13]:
# Since female flag contains the same information as gender, we can remove 
# the gender column from the dataframe
student_df = student_df.drop('gender', axis = 1)
student_df

Unnamed: 0,name,class,gpa,num_classes,female_flag
0,Alice,FY,90,4,True
1,Bob,SO,93,3,False
2,Carol,SR,97,4,True
3,Dan,SO,89,4,False
4,Eli,JR,95,3,False
5,Fran,SR,92,2,True


In [15]:
# Concatenate a df with student_df (after dropping class) with the one
# hot encoded version of class
student_df = pd.concat([student_df.drop('class', axis = 1), 
                       pd.get_dummies(student_df['class'])], axis = 1)
student_df

Unnamed: 0,name,gpa,num_classes,female_flag,JR,FY,SO,SR
0,Alice,90,4,True,0,1,0,0
1,Bob,93,3,False,0,0,1,0
2,Carol,97,4,True,0,0,0,1
3,Dan,89,4,False,0,0,1,0
4,Eli,95,3,False,1,0,0,0
5,Fran,92,2,True,0,0,0,1


In [20]:
gender_group = student_df.groupby('female_flag')
gender_group

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000016F1C5ED430>

In [19]:
gender_group.mean()

Unnamed: 0_level_0,gpa,num_classes,JR,FY,SO,SR
female_flag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
False,92.333333,3.333333,0.333333,0.0,0.666667,0.0
True,93.0,3.333333,0.0,0.333333,0.0,0.666667
