In [1]:
import requests
import pandas as pd

In [2]:
df = pd.DataFrame()

In [3]:
for i in range(0,140):
    response = requests.get('https://www.phdstipends.com/data/'+str(i))
    j = response.json()
    df = pd.concat([df, pd.DataFrame.from_dict(j['data'])])
# WARNING: They have removed their 'living wage ratio' from their website, presumably due to a data integrity issue.
# Use Living Wage data from elsewhere for your area.

In [4]:
df.columns = ['University','Department','Overall Pay', 'Living Wage Ratio','Academic Year','Program Year','Comments','12M Gross Pay','9M Gross Pay', '3M Gross Pay', 'Fees']
df.to_csv('stipends.csv', index=False)

In [5]:
df

Unnamed: 0,University,Department,Overall Pay,Living Wage Ratio,Academic Year,Program Year,Comments,12M Gross Pay,9M Gross Pay,3M Gross Pay,Fees
0,University of California - Santa Barbara (UCSB),Chemical Engineering,"$31,125",1.27,2014-2015,1st,,"$31,250",,,$125
1,Pennsylvania State University (Penn State),Chemistry,"$20,500",1.11,2014-2015,2nd,,"$20,500",,,
2,University of California - San Diego (UCSD),Mathematics,"$20,500",0.87,2013-2014,4th,,,"$18,000","$2,500",
3,University of North Carolina - Chapel Hill (UNC),Chemistry,"$26,700",1.42,2014-2015,2nd,Through BBSP,"$28,500",,,"$1,800"
4,University of California - Santa Barbara (UCSB),Materials,"$34,000",1.38,2013-2014,5th,This is the gross pay for someone with a fello...,"$34,000",,,
...,...,...,...,...,...,...,...,...,...,...,...
101,Cornell University (CU),Biological and Environmental Engineering,"$32,950",1.36,2017-2018,4th,Includes health insurance,"$33,000",,,$50
102,San Diego State University (SDSU),Mechanical Engineering,"$21,376",0.81,2021-2022,2nd,,"$23,376",,,"$2,000"
103,University of Texas - Austin (UT),Chemical Engineering,"$35,000",1.53,2022-2023,1st,"Health insurance, dental insurance, and vision...","$35,000",,,
104,University of Wisconsin - Madison (UW),Human Ecology,"$21,115",0.92,2022-2023,2nd,,,"$21,115",,


In [6]:
def format_year(a):
    try:
        return float(a.split('-')[0])
    except:
        return 0
df['Academic Year'] = [format_year(a) for a in df['Academic Year']]
df = df.sort_values(by=['Academic Year'])

In [7]:
def format_pay(p):
    p_stripped = p.replace('$','').replace(',','')
    try:
        return float(p_stripped)
    except:
        return p_stripped
df['Overall Pay'] = [format_pay(p) for p in df['Overall Pay']]

In [8]:
def format_ratio(r):
    try:
        return float(r)
    except:
        return None
df['Living Wage Ratio'] = [format_ratio(r) for r in df['Living Wage Ratio']]

In [10]:
def format_dept(d):
    d = d.lower()
    if 'psych' in d:
        return 'Psychology'
    if 'sociology' in d:
        return 'Sociology'
    if 'english' in d:
        return 'English'
    if ('computer' in d) or ('khoury' in d):
        return 'Computer Science or Engineering'
    if 'mech' in d:
        return 'Mechanical Engineering'
    if 'chem' in d:
        return 'Chemistry / Chemical Engineering'
    if ('marine' in d) or ('env' in d):
        return 'Marine and Environmental Sciences'
    if 'math' in d:
        return 'Mathematics'
    if 'econ' in d:
        return 'Economics'
    else:
        return d.title()
df['Department'] = [format_dept(d) for d in df['Department']]

In [11]:
df['Comments'] = [c.replace('. ', '.<br>').replace(', ', ',<br>').replace(') ', ')<br>') for c in df['Comments']]

In [12]:
neu = df[df['University']=='Northeastern University (NU)']

In [30]:
# Data from https://livingwage.mit.edu/metros/14460
# and archival from https://web.archive.org/web/changes/https://livingwage.mit.edu/metros/14460
living_wages = {
    2014:[None,None],
    2015:[27_660, 23_698, 56_706, 48_583],
    2016:[26_411, 23_375, 55_409, 49_038],
    2017:[27_078, 23_807, 56_812, 49_949],
    2018:[29_345, 25_753, 62_724, 55_045],
    2019:[30_577, 26_834, 63_710, 55_910],
    2020:[34_824, 30_560, 70_298, 61_692],
    2021:[39_879, 34_315, 81_282, 69_943],
    2022:[46_918, 38_027, 93_854, 76_069],
}
living_wage_df = pd.DataFrame.from_dict(data=living_wages, orient='index',columns=['No child pre-tax', 'No child post-tax', '1 child pre-tax', '1 child post-tax']).reset_index().rename(columns={'index':'year'})
living_wage_df

Unnamed: 0,year,No child pre-tax,No child post-tax,1 child pre-tax,1 child post-tax
0,2014,,,,
1,2015,27660.0,23698.0,56706.0,48583.0
2,2016,26411.0,23375.0,55409.0,49038.0
3,2017,27078.0,23807.0,56812.0,49949.0
4,2018,29345.0,25753.0,62724.0,55045.0
5,2019,30577.0,26834.0,63710.0,55910.0
6,2020,34824.0,30560.0,70298.0,61692.0
7,2021,39879.0,34315.0,81282.0,69943.0
8,2022,46918.0,38027.0,93854.0,76069.0


In [32]:
# Data from https://livingwage.mit.edu/metros/14460
# and archival from https://web.archive.org/web/changes/https://livingwage.mit.edu/metros/14460
rent = {
    2014:[None,None],
    2015:[11_661, 16_368, 13_013],
    2016:[11_661, 16_368, 13_013],
    2017:[11_939, 17_617, 14_088],
    2018:[13_333, 18_839, 15_195],
    2019:[14_019, 19_491, 15_789],
    2020:[17_352, 23_846, 19_392],
    2021:[18_466, 25_112, 20_425],
    2022:[18_675, 25_408, 20_688],
}
rent_df = pd.DataFrame.from_dict(data=rent, orient='index',columns=['1 Adult', '1 Adult 1 Child', '2 Adults']).reset_index().rename(columns={'index':'year'})
rent_df

Unnamed: 0,year,1 Adult,1 Adult 1 Child,2 Adults
0,2014,,,
1,2015,11661.0,16368.0,13013.0
2,2016,11661.0,16368.0,13013.0
3,2017,11939.0,17617.0,14088.0
4,2018,13333.0,18839.0,15195.0
5,2019,14019.0,19491.0,15789.0
6,2020,17352.0,23846.0,19392.0
7,2021,18466.0,25112.0,20425.0
8,2022,18675.0,25408.0,20688.0


In [25]:
import plotly.express as px
import plotly.graph_objects as go

In [54]:
fig = go.Figure()
for department in set(neu['Department']):
    dept_df = neu[neu['Department']==department]
    fig.add_trace(go.Scatter(
        x=dept_df['Academic Year'],
        y=dept_df['Overall Pay'],
        mode='markers',
        name=department,
        showlegend = False,
        marker=dict(
            opacity=0.5,
            size=10,
            line=dict(
                color='#333',
                width=1
            )
        )
    ))
fig.add_trace(go.Scatter(x=living_wage_df['year'], y=living_wage_df['No child pre-tax'], mode='lines',
                         name='Living Wage (No Dependents, Pre-Tax)',
                         line = dict(color='firebrick', width=2, dash='dot')
                        ))
fig.add_trace(go.Scatter(x=living_wage_df['year'], y=living_wage_df['No child post-tax'], mode='lines',
                         name='Living Wage (No Dependents, After Taxes )',
                         line = dict(color='firebrick', width=2)
                        ))
fig.add_trace(go.Scatter(x=living_wage_df['year'], y=living_wage_df['1 child pre-tax'], mode='lines',
                         name='Living Wage (1 Child, Pre-Tax)',
                         line = dict(color='salmon', width=2, dash='dot')
                        ))
fig.add_trace(go.Scatter(x=living_wage_df['year'], y=living_wage_df['1 child post-tax'], mode='lines',
                         name='Living Wage (1 Child, After Taxes )',
                         line = dict(color='salmon', width=2)
                        ))
fig.add_trace(go.Scatter(x=rent_df['year'], y=rent_df['1 Adult'], mode='lines',
                         name='Housing (1 Adult)',
                         line = dict(color='lightblue', width=2)
                        ))
fig.add_trace(go.Scatter(x=rent_df['year'], y=rent_df['1 Adult 1 Child'], mode='lines',
                         name='Housing (1 Adult 1 Child)',
                         line = dict(color='lightblue', width=2, dash='dot')
                        ))
fig.add_trace(go.Scatter(x=rent_df['year'], y=rent_df['2 Adults'], mode='lines',
                         name='Housing (2 Adults)',
                         line = dict(color='lightblue', width=2, dash='dash')
                        ))
fig.add_annotation(x=-0.1, y=-0.2,
                   xref='paper', yref='paper',
            text="Stipend source: <a href='https://www.phdstipends.com/'>https://www.phdstipends.com/</a><br>\
Living Wage source: <a href='https://livingwage.mit.edu/metros/14460'>https://livingwage.mit.edu/metros/14460</a><br>\
Historical data from the Internet Archive: <a href='https://web.archive.org/web/changes/https://livingwage.mit.edu/metros/14460'>https://web.archive.org/web/changes/https://livingwage.mit.edu/metros/14460</a>",
            showarrow=False,
            align="left",
            font=dict(
                size=10,
                color="gray"
                )
            )
fig.update_layout(template='plotly_white', title='Graduate Stipends at Northeastern University')

In [55]:
fig.write_html('neu_grad_stipends.html')
fig.write_image('neu_grad_stipends.png')