In [1]:
import pandas as pd
import random
import requests
import math
import chart_studio.plotly as py
import plotly.graph_objects as go
import plotly.express as px

In [2]:
# Load Merged PDF Data

bach_prog = pd.read_csv('pdf_merged.csv', index_col=0, dtype={'cip_code': str, 'CIPCode': str})
bach_prog.head()

Unnamed: 0,cip_code,total_graduates,knowledge_rate,career_outcomes_perc,total_perc_employed_overall,total_perc_employed_full_time,total_perc_employed_part_time,perc_standard_employment_overall,perc_standard_full_time,perc_standard_part_time,...,perc_military,perc_continuing_ed,perc_seeking_employment,perc_seeking_continuing_ed,perc_not_seeking,mean_starting_salary,year,perc_still_seeking_overall,CIPCode,CIPTitle
0,1.01,1618,74.8,86.9,75.9,72.9,3.0,68.8,66.8,2.1,...,0.2,9.9,11.1,1.9,0.9,45862,2016,,1.01,Agricultural Business and Management.
1,1.03,92,64.1,84.7,72.9,67.8,5.1,66.1,61.0,5.1,...,0.0,11.9,15.3,0.0,0.0,36987,2016,,1.03,Agricultural Production Operations.
2,1.06,179,73.2,9.7,77.9,75.6,2.3,66.4,64.9,1.5,...,0.0,11.5,5.3,3.8,1.5,36417,2016,,1.06,Applied Horticulture and Horticultural Busines...
3,1.08,228,82.0,83.9,64.2,59.9,4.3,58.8,55.6,3.2,...,0.0,18.7,14.4,1.6,0.5,36268,2016,,1.08,Agricultural Public Services.
4,1.09,1758,67.6,85.2,54.9,5.8,4.0,47.8,44.8,3.0,...,0.1,29.2,1.0,4.6,1.2,3674,2016,,1.09,Animal Sciences.


In [3]:
# Load Merged Scorecard Data

scorecard_data = pd.read_csv('scorecard_merged.csv', index_col=0, dtype={'cip_code': str, 'CIPCode': str})
scorecard_data.head()

Unnamed: 0,cip_code,earnings.highest.1_yr.male_median_earnings,earnings.highest.1_yr.nonmale_median_earnings,earnings.highest.1_yr.overall_median_earnings,earnings.highest.3_yr.male_median_earnings,earnings.highest.3_yr.nonmale_median_earnings,earnings.highest.2_yr.overall_median_earnings,CIPCode,CIPTitle
0,1.0,40786.133333,33450.777778,37245.885057,44577.884615,36479.083333,36284.225862,1.0,"Agriculture, General."
1,1.01,45522.136765,41171.573333,42977.276389,52745.035714,48119.102273,42363.455492,1.01,Agricultural Business and Management.
2,1.02,47853.916667,,46811.097222,54657.759259,,46732.625,1.02,Agricultural Mechanization.
3,1.03,37155.181818,24697.818182,31324.65873,43029.1,30610.388889,32066.041667,1.03,Agricultural Production Operations.
4,1.04,68447.0,64981.0,54205.5,78507.0,,48990.333333,1.04,Agricultural and Food Products Processing.


In [4]:
# Get Random CIP Codes
def get_rand_cips(prog_data, cip_count=8):
    
    rand_cips = random.choices(prog_data['cip_code'].unique(), k=cip_count)
    return rand_cips

In [9]:
def stacked_bar(prog_data, cip_list, selected_cips = []):
    
    fig_data = prog_data[prog_data['cip_code'].isin(cip_list)]
    grouped = fig_data.groupby('cip_code').mean().reset_index()
    grouped = grouped.merge(fig_data[['CIPCode', 'CIPTitle']], how='left', left_on='cip_code', right_on='CIPCode')
    grouped.drop_duplicates(inplace=True)
    
    selected_grouped = grouped[grouped['cip_code'].isin(selected_cips)]
    
    other_grouped = grouped[~grouped['cip_code'].isin(selected_cips)]
    
    if len(selected_grouped) == 0:
        opacity = 1.0
    else:
        opacity = 0.25
    
    stacked = go.Figure(data=[go.Bar(name='(Selected) Percent Employed',
                                    x=selected_grouped['cip_code'],
                                    y=selected_grouped['total_perc_employed_overall'],
                                    hovertext = selected_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "Percent Employed: %{y:.0f}%<br>"+"<extra></extra>"),
                              
                              go.Bar(name='(Selected) Percent Continuing Education',
                                    x=selected_grouped['cip_code'],
                                    y=selected_grouped['perc_continuing_ed'],
                                    hovertext = selected_grouped['CIPTitle'],
                                    hovertemplate = 
                                     "Percent Continuing Education: %{y:.0f}%<br>"+"<extra></extra>"),
                              
                              go.Bar(name='Percent Employed',
                                    x=other_grouped['cip_code'],
                                    y=other_grouped['total_perc_employed_overall'],
                                    opacity = opacity,
                                    hovertext = other_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "Percent Employed: %{y:.0f}%<br>"+"<extra></extra>"),
                              
                              go.Bar(name='Percent Continuing Education',
                                    x=other_grouped['cip_code'],
                                    y=other_grouped['perc_continuing_ed'],
                                    opacity = opacity,
                                    hovertext = other_grouped['CIPTitle'],
                                    hovertemplate =  
                                     "Percent Continuing Education: %{y:.0f}%<br>"+"<extra></extra>")
                              
                              
                             ]
                       )
    stacked.update_layout(barmode='stack', title='Average First-Year Outcomes by CIP Code',
                          xaxis_title = 'CIP Code',
                          yaxis_title = 'Percentage',
                          hovermode = 'x unified',
                          template = 'ggplot2',
                          legend={'traceorder':'normal', 'borderwidth':1, 'yanchor':'middle', 'y':1.02,
                                  'xanchor':'right', 'x':0.99})
    
    return stacked

sample_cips = get_rand_cips(bach_prog)

highlight_cips = random.choices(sample_cips, k=random.choice(range(len(sample_cips))))

stacked_bar(bach_prog, sample_cips)

In [10]:
def year_grouped_bar(scorecard_data, cip_list, selected_cips = []):
    
    fig_data = scorecard_data[scorecard_data['cip_code'].isin(cip_list)]
    
    selected_grouped = fig_data[fig_data['cip_code'].isin(selected_cips)]
    
    other_grouped = fig_data[~fig_data['cip_code'].isin(selected_cips)]
    
    if len(selected_grouped) == 0:
        opacity = 1.0
    else:
        opacity = 0.25
    
    grouped = go.Figure(data=[go.Bar(name='(Selected) 1-Year Median Earnings',
                                    x=selected_grouped['cip_code'],
                                    y=selected_grouped['earnings.highest.1_yr.overall_median_earnings'].round(-3),
                                    hovertext = selected_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "1-Year Median Earnings: $%{y}<br>"+"<extra></extra>"),
                              
                              go.Bar(name='(Selected) 2-Year Median Earnings',
                                    x=selected_grouped['cip_code'],
                                    y=selected_grouped['earnings.highest.2_yr.overall_median_earnings'].round(-3),
                                    hovertext = selected_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "2-year Median Earnings: $%{y}<br>"+"<extra></extra>"),
                              
                              go.Bar(name='1-Year Median Earnings',
                                    x=other_grouped['cip_code'],
                                    y=other_grouped['earnings.highest.1_yr.overall_median_earnings'].round(-3),
                                    opacity = opacity,
                                    hovertext = other_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "1-Year Median Earnings: $%{y}<br>"+"<extra></extra>"),
                              
                              go.Bar(name='2-Year Median Earnings',
                                    x=other_grouped['cip_code'],
                                    y=other_grouped['earnings.highest.2_yr.overall_median_earnings'].round(-3),
                                    opacity = opacity,
                                    hovertext = other_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "2-Year Median Earnings: $%{y}<br>"+"<extra></extra>")
                              
                              
                             ]
                       )
    grouped.update_layout(barmode='group', title='First and Second Year Post-Grad Median Earnings by CIP Code',
                          xaxis_title = 'CIP Code',
                          yaxis_title = 'Reported Post-Grad Earnings',
                          template = 'ggplot2',
                          legend={'traceorder':'normal', 'borderwidth':1, 'yanchor':'middle', 'y':1.02,
                                  'xanchor':'right', 'x':0.99})
    
    return grouped

sample_cips = get_rand_cips(bach_prog)

highlight_cips = random.choices(sample_cips, k=random.choice(range(len(sample_cips))))

year_grouped_bar(scorecard_data, sample_cips)

In [11]:
def male_grouped_bar(scorecard_data, cip_list, selected_cips = []):
    
    fig_data = scorecard_data[scorecard_data['cip_code'].isin(cip_list)]
    
    selected_grouped = fig_data[fig_data['cip_code'].isin(selected_cips)]
    
    other_grouped = fig_data[~fig_data['cip_code'].isin(selected_cips)]
    
    if len(selected_grouped) == 0:
        opacity = 1.0
    else:
        opacity = 0.25
    
    grouped = go.Figure(data=[go.Bar(name='(Selected) 1-Year Nonmale Median Earnings',
                                    x=selected_grouped['cip_code'],
                                    y=selected_grouped['earnings.highest.1_yr.nonmale_median_earnings'].round(-3),
                                    hovertext = selected_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "Median Nonmale Earnings: $%{y}<br>"+"<extra></extra>"),
                              
                              go.Bar(name='(Selected) 1-Year Male Median Earnings',
                                    x=selected_grouped['cip_code'],
                                    y=selected_grouped['earnings.highest.1_yr.male_median_earnings'].round(-3),
                                    hovertext = selected_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "Median Male Earnings: $%{y}<br>"+"<extra></extra>"),
                              
                              go.Bar(name='1-Year Nonmale Median Earnings',
                                    x=other_grouped['cip_code'],
                                    y=other_grouped['earnings.highest.1_yr.nonmale_median_earnings'].round(-3),
                                    opacity = opacity,
                                    hovertext = other_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "Median Nonmale Earnings: $%{y}<br>"+"<extra></extra>"),
                              
                              go.Bar(name='1-Year Male Median Earnings',
                                    x=other_grouped['cip_code'],
                                    y=other_grouped['earnings.highest.1_yr.male_median_earnings'].round(-3),
                                    opacity = opacity,
                                    hovertext = other_grouped['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" +
                                     "Median Male Earnings: $%{y}<br>"+"<extra></extra>")
                              
                              
                             ]
                       )
    grouped.update_layout(barmode='group', title='First Year Post-Grad Median Earnings (Male and Nonmale Graduates)',
                          xaxis_title = 'CIP Code',
                          yaxis_title = 'Reported Post-Grad Earnings',
                          template = 'ggplot2',
                          legend={'traceorder':'normal', 'borderwidth':1, 'yanchor':'middle', 'y':1.02,
                                  'xanchor':'right', 'x':0.99})
    
    return grouped

sample_cips = get_rand_cips(bach_prog)

highlight_cips = random.choices(sample_cips, k=random.choice(range(len(sample_cips))))

male_grouped_bar(scorecard_data, sample_cips)

In [8]:
## HIGHLIGHT WITH CIP NAME, MAYBE CHANGE LINE WIDTH
import numpy as np

def reg_salary(prog_data, cip_list):
    
    fig_data = prog_data[prog_data['cip_code'].isin(cip_list)]
    
    salary = go.Figure()
    
    colors = ['maroon', 'fuchsia', 'green', 'black', 'red', 'lime', 'navy', 
              'yellow', 'olive', 'blue', 'teal', 'aqua', 'silver', 'gray']
    
    for i, cip in enumerate(cip_list):
        cip_data = fig_data[fig_data['cip_code'] == cip].sort_values(by=['year'])
        cip_data['cip_upper'] = cip_data['mean_starting_salary'] + 500
        cip_data['cip_lower'] = cip_data['mean_starting_salary'] - 500
        
        cip_title = cip_data.iloc[0]['CIPTitle']
        
        year_bounds = list(cip_data['year']) + list(cip_data['year'])[::-1]
        salary_bounds = list(cip_data['cip_upper']) + list(cip_data['cip_lower'])[::-1]
        
        salary.add_trace(go.Scatter(name = cip_title,
                                    x = year_bounds,
                                    y = salary_bounds,
                                    mode = 'lines',
                                    hoverinfo = 'name',#"%{name}<extra></extra>",
                                    fill = 'toself',
                                    fillcolor = colors[i],
                                    line_color = colors[i],
                                    showlegend = False,
                                    opacity = 0.25)
                        )
        
        salary.add_trace(go.Scatter(name = cip,
                                    x = cip_data['year'],
                                    y = cip_data['mean_starting_salary'].round(-3),
                                    mode = 'lines+markers',
                                    line_color = colors[i],
                                    hovertext = cip_data['CIPTitle'],
                                    hovertemplate = "<b>%{hovertext}</b><br><br>" + "Year: %{x}<br>" +
                                                    "Mean Starting Salary: $%{y}<br>"+"<extra></extra>")
                        )
    
    salary.update_layout(template = 'ggplot2', legend_title = 'CIP Code', hoverlabel=dict(namelength=-1))
    
    return salary

sample_cips = get_rand_cips(bach_prog, 8)

reg_salary(bach_prog, sample_cips).show()

In [49]:
#bach_prog.to_csv('pdf_merged.csv')

In [89]:
#scorecard_data.to_csv('scorecard_merged.csv')

In [86]:
# scorecard_data = scorecard_data.merge(cip_summary[['CIPCode', 'CIPTitle']], how='left', left_on='cip_code', right_on='CIPCode')
# scorecard_data

Unnamed: 0,cip_code,earnings.highest.1_yr.male_median_earnings,earnings.highest.1_yr.nonmale_median_earnings,earnings.highest.1_yr.overall_median_earnings,earnings.highest.3_yr.male_median_earnings,earnings.highest.3_yr.nonmale_median_earnings,earnings.highest.2_yr.overall_median_earnings,CIPCode,CIPTitle
0,01.00,40786.133333,33450.777778,37245.885057,44577.884615,36479.083333,36284.225862,01.00,"Agriculture, General."
1,01.01,45522.136765,41171.573333,42977.276389,52745.035714,48119.102273,42363.455492,01.01,Agricultural Business and Management.
2,01.02,47853.916667,,46811.097222,54657.759259,,46732.625000,01.02,Agricultural Mechanization.
3,01.03,37155.181818,24697.818182,31324.658730,43029.100000,30610.388889,32066.041667,01.03,Agricultural Production Operations.
4,01.04,68447.000000,64981.000000,54205.500000,78507.000000,,48990.333333,01.04,Agricultural and Food Products Processing.
...,...,...,...,...,...,...,...,...,...
394,60.01,158924.000000,,190365.500000,184245.000000,,182336.333333,60.01,Dental Residency Programs.
395,60.03,,,,,,,60.03,Veterinary Residency Programs.
396,60.04,,,,,,,60.04,Medical Residency Programs - General Certifica...
397,60.05,,,,,,,60.05,Medical Residency Programs - Subspecialty Cert...
