In [20]:
import plotly.graph_objects as go

fig = go.Figure()

In [21]:
# HODP colors
monochrome_colors = ['#251616', '#760000', '#C63F3F', '#E28073', '#F1D3CF']
primary_colors = ['#C63F3F', '#F4B436', '#83BFCC', '#455574', '#E2DDDB']

# HODP template
theme_hodp = go.layout.Template(
    layout=go.Layout(
        title = {'font':{'size':24, 'family':"Helvetica", 'color':monochrome_colors[0]}, 'pad':{'t':100, 'r':0, 'b':0, 'l':0}},
        font = {'size':18, 'family':'Helvetica', 'color':'#717171'},
        xaxis = {'ticks': "outside",
                'tickfont': {'size': 14, 'family':"Helvetica"},
                'showticksuffix': 'all',
                'showtickprefix': 'last',
                'showline': True,
                'title':{'font':{'size':18, 'family':'Helvetica'}, 'standoff':20},
                'automargin': True
                },
        yaxis = {'ticks': "outside",
                'tickfont': {'size': 14, 'family':"Helvetica"},
                'showticksuffix': 'all',
                'showtickprefix': 'last',
                'title':{'font':{'size':18, 'family':'Helvetica'}, 'standoff':20},
                'showline': True,
                'automargin': True
                },
        legend = {'bgcolor':'rgba(0,0,0,0)', 
                'title':{'font':{'size':18, 'family':"Helvetica", 'color':monochrome_colors[0]}}, 
                'font':{'size':14, 'family':"Helvetica"}, 
                'yanchor':'bottom'
                },
        colorscale = {'diverging':monochrome_colors},
        coloraxis = {'autocolorscale':True, 
                'cauto':True, 
                'colorbar':{'tickfont':{'size':14,'family':'Helvetica'}, 'title':{'font':{'size':18, 'family':'Helvetica'}}},
                }
    )
)

In [22]:
import numpy as np
import pandas as pd

import plotly.graph_objects as go

df = pd.read_csv("summer_jobs.csv")

df.head()

Unnamed: 0,Q6,Q7,Q1,Q1_9_TEXT,Q2,Q3_1,Q4,Q4_8_TEXT,Q5,Q10_1
0,What is your social year?,What is your concentration? Select all that ap...,What are your current plans for Summer 2021? -...,What are your current plans for Summer 2021? -...,"If you have summer plans, how will they be con...",How satisfied are you with your current summer...,What did you do during Summer 2020? - Selected...,What did you do during Summer 2020? - Other - ...,Did your Summer 2020 plans change due to the p...,How satisfied were you with your plans during ...
1,2023,Integrative Biology,Research,,In-Person,4,Part-Time Internship,,Yes,2
2,2024,Undecided,Research,,Remote,4,No Plans,,Yes,2
3,2023,Social Studies,Full-Time Internship,,Remote,3,Full-Time Internship,,Yes,4
4,2024,Government,Other,Summer School,In-Person,4,No Plans,,Yes,4


In [23]:
col_list = ["Q6", "Q5"]
df = pd.read_csv("summer_jobs.csv", usecols=col_list)
df.head()

Unnamed: 0,Q6,Q5
0,What is your social year?,Did your Summer 2020 plans change due to the p...
1,2023,Yes
2,2024,Yes
3,2023,Yes
4,2024,Yes


In [24]:
col_list = ["Q6", "Q5"]
df = pd.read_csv("summer_jobs.csv", usecols=col_list)
lst = df[1:]
length = len(lst)

def update (year):
    yes = 0
    no = 0
    for i in range (1, length+1):
        if lst["Q6"][i] == year:
            if lst["Q5"][i] == 'Yes':
                yes = yes +1
            else:
                no = no +1
    return [int(year), yes, no]

for i in range (0,4):
    yr = str(2024 - i) 
    print("Year: " + str(update(yr)[0]) + " | Yes: " + str(update(yr)[1]) + " | No: " + str(update(yr)[2]))

Year: 2024 | Yes: 37 | No: 19
Year: 2023 | Yes: 32 | No: 13
Year: 2022 | Yes: 30 | No: 6
Year: 2021 | Yes: 7 | No: 3


NOTE: 2 individuals answered "Other," so they do not fall into the class year categories

In [25]:
X = ['2021', '2022', '2023', '2024']
Y1 = [70, 83.3, 71.1, 66.1]
Y2 = [30, 16.7 , 28.9, 33.9]

fig = go.Figure()
fig.add_trace(go.Bar(
    x=X, 
    y=Y1, 
    name="Yes",
    marker_color=primary_colors[0],
))

fig.add_trace(go.Bar(x=X, y=Y2, name="No", marker_color=primary_colors[3],))

fig.update_layout(
    barmode='stack',
    title= {'text': "Changes to 2020 Student Summer Plans",
            'y':0.9,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
    xaxis_title="Class Year",
    yaxis_title="Percentage of Student Respondents")

fig.show()

In [26]:
con_col = ['Q7','Q5']
df_concen = pd.read_csv("summer_jobs.csv", usecols=con_col)
df_concen['Q7'][1:].value_counts(ascending=True)

History and Science                              1
Computer Science,Philosophy                      1
History of Art and Architecture                  1
Economics,Government                             1
Chemical and Physical Biology,Statistics         1
Computer Science,Statistics                      1
History                                          1
History and Science,Neuroscience                 1
Astrophysics,Earth and Planetary Science         1
Astrophysics                                     1
English,Music                                    1
Government,Statistics                            1
Human Evolutionary Biology                       1
Music                                            1
Comparative Literature                           1
Economics,Government,Psychology,Undecided        1
Economics,Neuroscience                           1
Human Developmental and Regenerative Biology     1
Classics,History                                 1
Computer Science,Economics     

In [27]:
engin_n_applied = [
    "Applied Math",
    "Biomedical Engineering",
    "Computer Science",
    "Electrical Engineering",
    "Engineering Sciences",
    "Environmental Science and Engineering",
    "Mechanical Engineering"]

sciences = [
    "Astrophysics",
    "Chemical and Physical Biology",
    "Chemistry",
    "Chemistry and Physics",
    "Earth and Planetary Sciences",
    "Environmental Science and Public Policy",
    "Human Developmental and Regenerative Biology",
    "Human Evolutionary Biology",
    "Integrative Biology",
    "Mathematics",
    "Molecular and Cellular Biology",
    "Neuroscience",
    "Physics",
    "Statistics"]

social_sciences = [
    "African and African American Studies",
    "Anthropology",
    "Economics",
    "Government",
    "History",
    "History and Science",
    "Psychology",
    "Social Studies",
    "Sociology",
    "Studies of Women, Gender, and Sexuality"]

arts_n_hum = [
    "Art, Film, and Visual Studies",
    "Classics",
    "Comparative Literature",
    "East Asian Studies",
    "English",
    "Folklore and Mythology",
    "Germanic Languages and Literatures",
    "History and Literature",
    "History of Art and Architecture",
    "Linguistics",
    "Music",
    "Near Eastern Languages and Civilizations",
    "Philosophy",
    "Comparative Study of Religion",
    "Romance Languages and Literatures",
    "Slavic Languages and Literatures",
    "South Asian Studies",
    "Theater, Dance, and Media"]

In [28]:
df_concen.head()

Unnamed: 0,Q7,Q5
0,What is your concentration? Select all that ap...,Did your Summer 2020 plans change due to the p...
1,Integrative Biology,Yes
2,Undecided,Yes
3,Social Studies,Yes
4,Government,Yes


In [29]:
# Will optimize for efficiency later
lst = df_concen[1:]

def group_con (concentration):
    yes = 0
    no = 0
    for i in range (1, length+1):
        check = lst['Q7'][i]
        if 'Undecided' in lst['Q7'][i] or 'Special Concentration' in lst['Q7'][i]:
            check = 'Undecided'
        # students who listed 2 concentrations will contribute up 
        # to one point based on how many of their concentrations fit into a concentration category
        elif lst['Q7'][i].count(',') == 1:
            joint = lst['Q7'][i].split(',')
            if (joint[0] in concentration) and (joint[1] in concentration):
                if lst['Q5'][i] == 'Yes':
                    yes = yes + 1
                else:
                    no = no + 1
            elif (joint[0] in concentration) or (joint[1] in concentration):
                if lst['Q5'][i] == 'Yes':
                    yes = yes + 0.5
                else:
                    no = no + 0.5
        if check in concentration:
            if lst['Q5'][i] == 'Yes':
                yes = yes +1
            else:
                no = no +1
    return [yes, no]

print("Engineering & Applied Sciences| Yes: " + str(group_con(engin_n_applied)[0]) + " | No: " + str(group_con(engin_n_applied)[1]))
print("Sciences| Yes: " + str(group_con(sciences)[0]) + " | No: " + str(group_con(sciences)[1]))
print("Social Sciences| Yes: " + str(group_con(social_sciences)[0]) + " | No: " + str(group_con(social_sciences)[1]))
print("Arts & Humanities| Yes: " + str(group_con(arts_n_hum)[0]) + " | No: " + str(group_con(arts_n_hum)[1]))
print("Undecided/ Other| Yes: " + str(group_con("Undecided")[0]) + " | No: " + str(group_con("Undecided")[1]))

Engineering & Applied Sciences| Yes: 19.5 | No: 12.0
Sciences| Yes: 32.0 | No: 13.0
Social Sciences| Yes: 36.0 | No: 9.0
Arts & Humanities| Yes: 14.0 | No: 4.0
Undecided/ Other| Yes: 5 | No: 4


In [30]:
X = ['Undecided/ Other', 'Arts & Humanities', 'Social Studies', 'Sciences', 'Engineering &\nApplied Sciences']
Y1 = [55.6, 77.8, 80, 71.1, 61.9]
Y2 = [44.4, 22.8, 20, 28.9, 38.1]

fig = go.Figure()
fig.add_trace(go.Bar(
    x=Y1, 
    y=X, 
    name="Yes",
    marker_color=primary_colors[0], orientation='h'
))

fig.add_trace(go.Bar(x=Y2, y=X, name="No", marker_color=primary_colors[3],  orientation='h'))

fig.update_layout(
    barmode='stack',
    title= {'text': "Changes to 2020 Student Summer Plans",
            'xanchor': 'center',
            'yanchor': 'top'},
    xaxis_title="Percentage of Student Respondents",
    yaxis_title="Concentration Types",
    legend= {'title':{'text':'Response'}},
    template = theme_hodp
)

fig.show()

In [31]:
# Comparing positions pre-covid vs post-covid
plan_col_list = ["Q1", "Q1_9_TEXT","Q4", "Q4_8_TEXT"]
plan_col_list

df_plan = pd.read_csv("summer_jobs.csv", usecols=plan_col_list)
df_plan.head()

Unnamed: 0,Q1,Q1_9_TEXT,Q4,Q4_8_TEXT
0,What are your current plans for Summer 2021? -...,What are your current plans for Summer 2021? -...,What did you do during Summer 2020? - Selected...,What did you do during Summer 2020? - Other - ...
1,Research,,Part-Time Internship,
2,Research,,No Plans,
3,Full-Time Internship,,Full-Time Internship,
4,Other,Summer School,No Plans,


In [32]:
# unique values in "other" cols
df_plan["Q1_9_TEXT"].unique()

array(['What are your current plans for Summer 2021? - Other - Text', nan,
       'Summer School', 'Part-time Internship and part-time job',
       'Travel', 'Summer class/teaching program',
       'Harvard Summer School Classes', 'classes',
       'Part-time job and part-time internship', 'Freelance Job',
       'Harvard summer school', 'summer school',
       'summer classes, training', 'Summer classes', 'summer classes'],
      dtype=object)

From the above results, we can observe that the primary plans that students have under the "Other" section are summer school, part-time internship/job (including freelance) and travel.

In [33]:
def label_other_2021 (df_plan):
   if (df_plan['Q1_9_TEXT'] == "Summer School") | (df_plan['Q1_9_TEXT'] == "Summer class/teaching program") | (df_plan['Q1_9_TEXT'] == "Harvard Summer School Classes") | (df_plan['Q1_9_TEXT'] == "classes") | (df_plan['Q1_9_TEXT'] == "Harvard summer school") | (df_plan['Q1_9_TEXT'] == "summer school")  | (df_plan['Q1_9_TEXT'] == "summer classes") | (df_plan['Q1_9_TEXT'] == "Summer classes") :
      return 'Summer school'
   if (df_plan['Q1_9_TEXT'] == "Part-time Internship and part-time job") | (df_plan['Q1_9_TEXT'] == "Part-time job and part-time internship") | (df_plan['Q1_9_TEXT'] == "Freelance Job"):
      return 'Part-time Internship and part-time job'
   if df_plan['Q1_9_TEXT'] == "Travel":
      return 'Travel'

df_plan["label_other_2021"] = df_plan.apply(lambda df_plan: label_other_2021(df_plan), axis=1)

In [34]:
df_plan["label_other_2021"].unique()

array([None, 'Summer school', 'Part-time Internship and part-time job',
       'Travel'], dtype=object)

In [35]:
df_plan["Q4_8_TEXT"].unique()

array(['What did you do during Summer 2020? - Other - Text', nan,
       'Part-time job and study for LSAT',
       'short program, otherwise no plans', 'Summer School',
       'Hackathon and online classes', 'Class', 'Summer class',
       'Had a part-time internship on the side as well since I was working at a day camp. ',
       'Research + part-time internship', 'Part Time Job + Classes',
       'Classes', 'classes'], dtype=object)

For summer 2020 "other" plans, students had part-time job, summer school, no plan (or not specified short program), part-time internship and part-time job, part-time job + summer school.

(we are treating summer school and online classes as the same category)

In [36]:
def label_other_2020 (df_plan):
   if (df_plan['Q4_8_TEXT'] == "Summer School") | (df_plan['Q4_8_TEXT'] == "Hackathon and online classes") | (df_plan['Q4_8_TEXT'] == "Summer class") | (df_plan['Q4_8_TEXT'] == "classes") | (df_plan['Q4_8_TEXT'] == "Classes") | (df_plan['Q4_8_TEXT'] == "Class"):
      return 'Summer school'
   if (df_plan['Q4_8_TEXT'] == "Part-time job and study for LSAT") | (df_plan['Q4_8_TEXT'] == "Had a part-time internship on the side as well since I was working at a day camp. ") | (df_plan['Q4_8_TEXT'] == "Research + part-time internship") | (df_plan['Q4_8_TEXT'] == "short program, otherwise no plans"):
      return 'Part-time Internship and part-time job'
   if df_plan['Q4_8_TEXT'] == "Part Time Job + Classes":
      return 'Part-time job and summer school'

df_plan["label_other_2020"] = df_plan.apply(lambda df_plan: label_other_2020(df_plan), axis=1)

In [37]:
df_plan['label_other_2020'].unique()

array([None, 'Part-time Internship and part-time job', 'Summer school',
       'Part-time job and summer school'], dtype=object)

In [38]:
df_plan = df_plan[["Q1", "label_other_2021", "Q4", "label_other_2020"]]
df_plan.head()

Unnamed: 0,Q1,label_other_2021,Q4,label_other_2020
0,What are your current plans for Summer 2021? -...,,What did you do during Summer 2020? - Selected...,
1,Research,,Part-Time Internship,
2,Research,,No Plans,
3,Full-Time Internship,,Full-Time Internship,
4,Other,Summer school,No Plans,


In [39]:
df_plan["Q1"].value_counts()

Full-Time Internship                                              63
Research                                                          38
Other                                                             15
Searching/Undecided                                               12
Full-Time Job                                                     10
No Plans                                                           3
Part-Time Internship                                               3
Volunteering                                                       3
Part-Time Job                                                      2
What are your current plans for Summer 2021? - Selected Choice     1
Name: Q1, dtype: int64

We are combining the categories of part-time internship and part-time job to avoid lots of categories in the pie chart. 

In [40]:
df_plan["Q1"].replace(to_replace=["Full-Time Internship", "Full-Time Job"], value='Full-time Internship or Job', inplace = True)
df_plan["Q1"].replace(to_replace=["Part-Time Internship", "Part-Time Job"], value='Part-time Internship or Job', inplace = True)
df_plan["Q1"].value_counts()

Full-time Internship or Job                                       73
Research                                                          38
Other                                                             15
Searching/Undecided                                               12
Part-time Internship or Job                                        5
No Plans                                                           3
Volunteering                                                       3
What are your current plans for Summer 2021? - Selected Choice     1
Name: Q1, dtype: int64

In [41]:
labels = ["Full-Time Internship or Job", "Research", "Other", "Searching/Undecided", "Part-Time Internship or Job", "Volunteering", "No Plans"]
values = [73, 38, 15, 12, 5, 3, 3]
colors = ['#501818', '#C63F3F', '#D87C7C', '#F4B436', '#83BFCC', '#455574', '#E2DDDB']

# initialize the figure
fig = go.Figure()

# add a trace
fig.add_trace(go.Pie(
   values=values, 
   labels=labels,
   marker_colors=colors,
))

# update the layout
fig.update_layout(
   title={'text': "Current plans for Summer 2021",
            'xanchor': 'center',
            'yanchor': 'top' },
   legend= {'title':{'text':'Legend Title'},
            'xanchor': 'center',
            'yanchor': 'top'},
   template=theme_hodp
)

# display the figure
fig.show()

In [42]:
    df_plan["label_other_2021"].value_counts()

Summer school                             10
Part-time Internship and part-time job     3
Travel                                     1
Name: label_other_2021, dtype: int64

In [43]:
labels = ["Summer school", "Part-time Internship and part-time job", "Travel"]
values = [10, 3, 1]
colors = ['#C63F3F', '#F4B436', '#83BFCC']

fig = go.Figure()

fig.add_trace(go.Pie(
    values=values, 
    labels=labels,
    marker_colors=colors
))

# update the layout
fig.update_layout(
   title={'text': "Breakdown of 'Other' response in Summer 2021 plans",
            'xanchor': 'center',
            'yanchor': 'top' },
   legend= {'title':{'text':'Legend Title'},
            'xanchor': 'center',
            'yanchor': 'top'},
   template=theme_hodp
)
fig.show()

In [44]:
df_plan["Q4"].value_counts()

Full-Time Internship                                     35
No Plans                                                 27
Research                                                 24
Part-Time Internship                                     19
Full-Time Job                                            15
Other                                                    13
Part-Time Job                                            12
Volunteering                                              4
What did you do during Summer 2020? - Selected Choice     1
Name: Q4, dtype: int64

In [45]:
df_plan["Q4"].replace(to_replace=["Full-Time Internship", "Full-Time Job"], value='Full-time Internship or Job', inplace = True)
df_plan["Q4"].replace(to_replace=["Part-Time Internship", "Part-Time Job"], value='Part-time Internship or Job', inplace = True)
df_plan["Q4"].value_counts()

Full-time Internship or Job                              50
Part-time Internship or Job                              31
No Plans                                                 27
Research                                                 24
Other                                                    13
Volunteering                                              4
What did you do during Summer 2020? - Selected Choice     1
Name: Q4, dtype: int64

In [46]:
labels = ["Full-Time Internship", "Part-Time Internship or Job", "No Plans","Research", "Other", "Volunteering"]
values = [50, 31, 27, 24, 13, 4]
colors = ['#501818', '#C63F3F', '#D87C7C', '#F4B436', '#83BFCC', '#455574']

# initialize the figure
fig = go.Figure()

# add a trace
fig.add_trace(go.Pie(
   values=values, 
   labels=labels,
   marker_colors=colors,
))

# update the layout
fig.update_layout(
   title={'text': "Student jobs during Summer 2020",
            'xanchor': 'center',
            'yanchor': 'top' },
   legend= {'title':{'text':'Legend Title'},
            'xanchor': 'center',
            'yanchor': 'top'},
   template=theme_hodp
)

# display the figure
fig.show()

In [47]:
df_plan["label_other_2020"].value_counts()

Summer school                             7
Part-time Internship and part-time job    4
Part-time job and summer school           1
Name: label_other_2020, dtype: int64

In [48]:
labels = ["Summer school", "Part-time Internship and part-time job", "Part-time job and summer school"]
values = [7, 4, 1]
colors = ['#C63F3F', '#F4B436', '#83BFCC']

fig = go.Figure()

fig.add_trace(go.Pie(
    values=values, 
    labels=labels,
    marker_colors=colors
))

# update the layout
fig.update_layout(
    title={'text': "Breakdown of 'Other' response in Summer 2020 jobs",
            'xanchor': 'center',
            'yanchor': 'top' },
    legend= {'title':{'text':'Legend Title'},
            'xanchor': 'center',
            'yanchor': 'top'},
   template=theme_hodp
)
fig.show()

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=402f4a20-c0d8-4058-bf08-addea464f714' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>