# Simpson's Paradox
Use `admission_data.csv` for this exercise.

In [2]:
# Load and view first few lines of dataset
import pandas as pd
df = pd.read_csv('admission_data.csv')
total_students = df.shape[0]
df.head()

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
1,56105,male,Physics,True
2,31441,female,Chemistry,False
3,51765,male,Physics,True
4,53714,female,Physics,True


### Proportion and admission rate for each gender

In [3]:
# Proportion of students that are female
females = df[df.gender == 'female']
female_count = females.shape[0]
print('Female Count: ', female_count)
print('Proportion of students that are female', female_count/total_students)

Female Count:  257
Proportion of students that are female 0.514


In [4]:
# Proportion of students that are male
males = df[df.gender == 'male']
male_count = males.shape[0]
print('Male Count: ', male_count)
print('Proportion of students that are male', male_count/total_students)

Male Count:  243
Proportion of students that are male 0.486


In [27]:
# Admission rate for females
females_admitted = females[females.admitted]
females_admission_count = females_admitted.shape[0]
print('Females Admitted: ', females_admission_count)
print('Admission rate for females: ', females_admission_count / female_count)

# Alternative:
# len(df[(df['gender']=='female') & (df['admitted'])])/(len(df[df['gender']=='female']))

Females Admitted:  74
Admission rate for females:  0.28793774319066145


0.28793774319066145

In [6]:
# Admission rate for males
males_admitted = males[males.admitted]
males_admission_count = males_admitted.shape[0]
print('Males Admitted:', males_admission_count)
print('Admission rate for males: ', males_admission_count / male_count)

Males Admitted: 118
Admission rate for males:  0.48559670781893005


### Proportion and admission rate for physics majors of each gender

In [7]:
# What proportion of female students are majoring in physics?
females_major_physics = females[(females.major=='Physics')].shape[0]
females_major_physics_proportion = females_major_physics / female_count
print('Proportion of female students are majoring in physics', females_major_physics_proportion)

# Alternative solution using query function:

# fem_phys_rate = admits.query("gender == 'male' & major == 'Physics'").count()/ \
#    (admits.query("gender == 'male'").count())
# print (fem_phys_rate)

Proportion of female students are majoring in physics 0.12062256809338522


In [8]:
# What proportion of male students are majoring in physics?
males_major_physics = males[(males.major=='Physics')].shape[0]
males_major_physics_proportion = males_major_physics / male_count
print('Proportion of male students are majoring in physics', males_major_physics_proportion)

Proportion of male students are majoring in physics 0.9259259259259259


In [9]:
# Admission rate for female physics majors
females_major_physics_admitted = females[(females.major=='Physics') & (females.admitted)].shape[0]
females_major_physics_admitted_proportion = females_major_physics_admitted / females_major_physics
print('Admission rate for female physics majors', females_major_physics_admitted_proportion)

# Alternative solution:
# len(admits[(admits["gender"]=='female') & (admits["major"] == 'Physics') & admits["admitted"]]) / len(admits[(admits["gender"]=='female') & (admits["major"] == 'Physics')])

Admission rate for female physics majors 0.7419354838709677


In [10]:
# Admission rate for male physics majors
males_major_physics_admitted = males[(males.major=='Physics') & (males.admitted)].shape[0]
males_major_physics_admitted_proportion = males_major_physics_admitted / males_major_physics
print('Admission rate for male physics majors', males_major_physics_admitted_proportion) 

Admission rate for male physics majors 0.5155555555555555


### Proportion and admission rate for chemistry majors of each gender

In [12]:
# What proportion of female students are majoring in chemistry?
females_major_chemistry = females[(females.major=='Chemistry')].shape[0]
females_major_chemistry_proportion = females_major_chemistry / female_count
print('Proportion of female students are majoring in Chemistry', females_major_chemistry_proportion)

Proportion of female students are majoring in Chemistry 0.8793774319066148


In [13]:
# What proportion of male students are majoring in chemistry?
males_major_chemistry = males[(males.major=='Chemistry')].shape[0]
males_major_chemistry_proportion = males_major_chemistry / male_count
print('Proportion of male students are majoring in physics', males_major_chemistry_proportion)

Proportion of male students are majoring in physics 0.07407407407407407


In [14]:
# Admission rate for female chemistry majors
females_major_chemistry_admitted = females[(females.major=='Chemistry') & (females.admitted)].shape[0]
females_major_chemistry_admitted_proportion = females_major_chemistry_admitted / females_major_chemistry
print('Admission rate for female Chemistry majors', females_major_chemistry_admitted_proportion)

Admission rate for female Chemistry majors 0.22566371681415928


In [15]:
# Admission rate for male chemistry majors
males_major_chemistry_admitted = males[(males.major=='Chemistry') & (males.admitted)].shape[0]
males_major_chemistry_admitted_proportion = males_major_chemistry_admitted / males_major_chemistry
print('Admission rate for male Chemistry majors', males_major_chemistry_admitted_proportion) 

Admission rate for male Chemistry majors 0.1111111111111111


### Admission rate for each major

In [24]:
# Admission rate for physics majors (answer should be a rate of 54.3%).
len(df[(df['major'] == 'Physics') & df['admitted']]) / len(df[(df['major'] == 'Physics')])

0.54296875

In [26]:
# Admission rate for chemistry majors (answer should be admission rate of 21.7%)
len(df[(df['major'] == 'Chemistry') & df['admitted']]) / len(df[(df['major'] == 'Chemistry')])

0.21721311475409835