In [1]:
import numpy as np
import pandas as pd
names = ['year','','','','','gender','','','headcount','','','','dept','major']
df = pd.read_csv('degrees.csv',names=names,header=0,usecols=[0,5,8,12,13])

In [2]:
df.head()

Unnamed: 0,year,gender,headcount,dept,major
0,1983-84,Decline to State,1,African American Studies,Afr Amer Stds-Humanities
1,1983-84,Decline to State,1,African American Studies,Afr Amer Stds-Social Sci
2,1983-84,Decline to State,9,Ag & Resource Econ & Pol,Pol Econ of Nat Resources
3,1983-84,Male,1,Ag & Resource Econ & Pol,Pol Econ of Nat Resources
4,1983-84,Decline to State,8,Anthropology,Anthropology


In [3]:
# Filter to most recent year
df2 = df[df['year'] == '2015-16']
# Calculate department aggregates
agg = df2.groupby(['dept', 'gender']).sum().div(df2.groupby(['dept']).sum())
agg.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,headcount
dept,gender,Unnamed: 2_level_1
African American Studies,Female,0.5
African American Studies,Male,0.5
Ag & Resource Econ & Pol,Female,0.554745
Ag & Resource Econ & Pol,Male,0.445255
Anthropology,Female,0.732143


In [5]:
agg.sort_values('headcount', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,headcount
dept,gender,Unnamed: 2_level_1
Other Env Design Programs,Female,1.000000
Scandinavian,Female,1.000000
Near Eastern Studies,Female,0.928571
Gender & Womens Studies,Female,0.900000
Electrical Eng & Computer Sci,Male,0.887671
Nuclear Engineering,Male,0.875000
L&S Envir Econ & Policy,Male,0.857143
History of Art,Female,0.842105
L&S Social Welfare,Female,0.839695
Mechanical Engineering,Male,0.837838


In [6]:
agg.loc['Electrical Eng & Computer Sci']

Unnamed: 0_level_0,headcount
gender,Unnamed: 1_level_1
Female,0.112329
Male,0.887671


In the 2015-16 academic year, 89% of degrees awarded by the EECS department went to male students. That's the highest proportion of male graduates of any department.

In [7]:
agg.loc['L&S Computer Science']

Unnamed: 0_level_0,headcount
gender,Unnamed: 1_level_1
Female,0.28436
Male,0.71564


In the 2015-16 academic year, 72% of degrees awarded by the L&S CS department went to male students.

In [8]:
df3 = df2[df2['dept'].str.contains('Computer')]
agg3 = df3.groupby(['gender']).sum()
agg3

Unnamed: 0_level_0,headcount
gender,Unnamed: 1_level_1
Female,161
Male,626


In [9]:
df3.sum(0).headcount

787

In [10]:
agg3.loc['Male'].headcount/df3.sum(0).headcount

0.795425667090216

In total, 80% of the 787 degrees awarded in computer science (EECS and L&S CS) in the 2015-16 academic year went to males.

In [15]:
# Back to considering all years
# Filter to EECS & CS
df4 = df[df['dept'].str.contains('Computer')]
# Get rid of 1983-84, since it contains an unsually high number of students declining to state their gender
df4 = df4[df4['year'] != '1983-84']
#agg4 = df4[df4['gender'] == 'Male'].groupby(['year']).sum().div(df4.groupby(['year']).sum())
agg4 = df4.groupby(['year','gender']).sum().div(df4.groupby(['year']).sum())
# Save to CSV
agg4.to_csv('site/csv/cs_by_year.csv')

The proportion of male EECS and L&S CS graduates rose throughout the 2000s, peaked in 2010-11 at 90%, and has declined to 80% since then.

In [16]:
# Generate the data for the graphic
# Get rid of 1983-84, since it contains an unsually high number of students declining to state their gender
df5 = df[df['year'] != '1983-84']
# Calculate the number of female graduates by year and department
agg5 = df5[df5['gender'] == 'Female'].groupby(['year','dept']).sum()
agg5.columns = ['female_count']
agg5
# Calculate the number of graduates by year and department
agg6 = df5.groupby(['year','dept']).sum()
agg6.columns = ['total_count']
agg6
# Divide to get the proportion of female graduates
agg7 = pd.concat([agg5, agg6], axis=1)
agg7['female_pct'] = agg7['female_count'].div(agg7['total_count'])
# Exclude departments with fewer than 20 graduates
agg7 = agg7[agg7['total_count'] > 20]
# Save to CSV
agg7.to_csv('site/csv/data.csv')

In [18]:
# Generate the data for the graphic
# Get rid of 1983-84, since it contains an unsually high number of students declining to state their gender
df5 = df[df['year'] != '1983-84']
df5 = df5[df5['dept'].str.contains('Electrical')]
# Calculate the number of female graduates by year and department
agg5 = df5[df5['gender'] == 'Female'].groupby(['year','dept']).sum()
agg5.columns = ['female_count']
agg5
# Calculate the number of graduates by year and department
agg6 = df5.groupby(['year','dept']).sum()
agg6.columns = ['total_count']
agg6
# Divide to get the proportion of female graduates
agg7 = pd.concat([agg5, agg6], axis=1)
agg7['female_pct'] = agg7['female_count'].div(agg7['total_count'])
# Save to CSV
agg7

Unnamed: 0_level_0,Unnamed: 1_level_0,female_count,total_count,female_pct
year,dept,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1984-85,Electrical Eng & Computer Sci,37,189,0.195767
1985-86,Electrical Eng & Computer Sci,36,233,0.154506
1986-87,Electrical Eng & Computer Sci,49,237,0.206751
1987-88,Electrical Eng & Computer Sci,44,216,0.203704
1988-89,Electrical Eng & Computer Sci,26,165,0.157576
1989-90,Electrical Eng & Computer Sci,34,231,0.147186
1990-91,Electrical Eng & Computer Sci,33,203,0.162562
1991-92,Electrical Eng & Computer Sci,28,209,0.133971
1992-93,Electrical Eng & Computer Sci,36,250,0.144
1993-94,Electrical Eng & Computer Sci,25,233,0.107296
