In [97]:
%run ./setup_notebook.ipynb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [99]:
import numpy as np # linear algebra
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go
# from plotly.subplots import make_subplot
# import ipywidgets as widgets
import re
from ipywidgets import interact, interact_manual
import folium
import time

In [100]:
less_cringe_hovertext_template = dict(
    bar=dict(hovertemplate="<b>%{x}</b><br><i>Count</i>: %{y}"),
    scatter=dict(hovertemplate="<b>%{x}</b><br><i>Count</i>: %{y}"),
    pie=dict(hovertemplate="<b>%{label}</b><br><i>Count</i>: %{value}"),
    histogram=dict(hovertemplate="<b>%{x}</b><br><i>Count</i>: %{y}")
)

<div style="font-family:'Trebuchet MS', 'Impact', sans-serif; 
            margin-bottom: 20px; letter-spacing: 6px; text-align:center;
            font-size: 200%;"><b>University of Toronto ECE2T2 Class Profile</b></div>

<div style="font-size: 16px;text-align:center;"><i>Number of Respondents: 52</i></div>

<p style="text-align:start;">Welcome to the University of Toronto ECE2T2 Class Profile -- Inspired by Waterloo Software Engineering 2021 Class Profile. 
We come here to give you all the deets, all the information on our graduating class: their passions, compassions, and the future they have built in their times at UofT.
   
Let's find out about who these 52 people of University of Toronto ECE2T2 are, and where they be at!</p>

In [101]:
df = pd.read_csv("Electrical_and_Computer_Engineering_2023_-_Class_Profile_Survey_Submissions_2023-03-29.csv")

In [102]:
print(len(df))
df.head(5)

52


Unnamed: 0,Submission ID,Respondent ID,Submitted at,What program were you in first year?,What program are you in now?,What 2 areas did you specialize in?,What 2 areas did you specialize in? (Area 1: Photonics & Semiconductor Physics),What 2 areas did you specialize in? (Area 2: Electromagnetics & Energy Systems),What 2 areas did you specialize in? (Area 3: Digital & Analog Electronics),"What 2 areas did you specialize in? (Area 4: Systems Control, Communications & Signal Processing)",...,Which of the following have you done during university? (Kissed someone romantically),Which of the following have you done during university? (Been in a committed relationship),Which of the following have you done during university? (Been in a long distance relationship),Which of the following have you done during university? (Had 'friends with benefits'),Which of the following have you done during university? (Used a dating app),List the recreational controlled substances you have used during university,Untitled long answer field (1),Untitled long answer field (2),Untitled long answer field (3),How many sexual partners have you had
0,qaRxpG,VpyqOy,2023-02-21 01:01:21,Computer,Computer,"Area 5: Computer Hardware & Computer Networks,...",False,False,False,False,...,False,False,False,False,True,,Interesting to see myself - as a person who kn...,Maybe not doing enough design teams and not sp...,Hope for the best for everyone ðŸ™‚,0.0
1,07e206,J1YGy4,2023-02-21 01:10:02,Electrical,Electrical,"Area 6: Computer Software, Area 4: Systems Con...",False,False,False,True,...,,,,,,Cannabis,The pandemic allowed me to realize I did not h...,I wish I had learned to focus on my own person...,Do not lose the person that you are in spite o...,0.0
2,E5xj2L,kbxgVM,2023-02-21 01:18:09,TrackOne,Computer,"Area 5: Computer Hardware & Computer Networks,...",False,False,False,False,...,,,,,,Cannabis,,,Live long and prosper,0.0
3,PDOjb0,9N5avV,2023-02-21 01:53:57,TrackOne,Electrical,"Area 1: Photonics & Semiconductor Physics, Are...",True,False,False,True,...,,,,,,,Celebrating my birthday with my U of T friends:),Not playing intramurals and going to more soci...,Find joy in simple things:),
4,9XQGoK,6D59QJ,2023-02-21 01:55:39,TrackOne,Computer,"Area 6: Computer Software, Area 5: Computer Ha...",False,False,False,False,...,,,,,,,,,,


## Data Cleaning



In [103]:

## Write out columns to a text file for analysis
with open("all_columns.txt", mode="w", encoding="utf-8") as f:
    f.write("\n".join(df.columns))

In [104]:
# Modify column data, if they still exist
if {"Untitled long answer field (1)", "Untitled long answer field (2)", "Untitled long answer field (3)"}.issubset(df.columns):
  df.rename(columns={
                      "Untitled long answer field (1)" : "1. Share a story, happy or sad, from your time in ECE",
                      "Untitled long answer field (2)" : "2. What is something you regret over your time at UofT?",
                      "Untitled long answer field (3)" : "3. Give a piece of advice to your fellow ECE 2T2 - 2023 grads"
                    }, inplace=True)

  # Check column data has indeed been changed
  index = df.columns.str.contains("1. Share a story, happy or sad, from your time in ECE") | \
  df.columns.str.contains("2. What is something you regret over your time at UofT?") | \
  df.columns.str.contains("3. Give a piece of advice to your fellow ECE 2T2 - 2023 grads")

  indices = df.columns[index]
  assert len(indices) == 3

In [105]:
# Drop unnecessary article. Non-useful data.
# Proof that all data under this column are null
if "https://money.usnews.com/money/personal-finance/family-finance/articles/where-do-i-fall-in-the-ameri" + \
          "can-economic-class-system" in df.columns:
        assert df["https://money.usnews.com/money/personal-finance/family-finance/articles/where-do-i-fall-in-the-ameri"
                "can-economic-class-system"].isna().sum() == len(df)

        df.drop("https://money.usnews.com/money/personal-finance/family-finance/articles/where-do-i-fall-in-the-american-economic-class-system", 
                axis=1, inplace=True)

## Data Exploration

# Class Profile

## What Program Were You in First Year?

In [107]:
# TO-DO! Make trace subplots with this
fig = px.pie(df, names="What program were you in first year?", color="What program were you in first year?")
fig.update_traces(textinfo="label+percent",
                  hovertemplate=less_cringe_hovertext_template["pie"]["hovertemplate"])
fig.update_layout(showlegend=False)
fig

## Exchange Information

In [108]:
# data_checker(df["Have you participated in an exchange?"], check_unique=True)

In [109]:
df["Have you participated in an exchange?"].fillna("Did not say", inplace=True)

In [110]:
# data_checker(df["If yes, where did you do your exchange?"], check_unique=True)

In [111]:
assert(
    df.loc[(df["Have you participated in an exchange?"] == "no") & \
        df["If yes, where did you do your exchange?"].notna(), ["Have you participated in an exchange?", 
                                                                "If yes, where did you do your exchange?"]].shape[0] == 0
)

In [112]:
assert(
    df.loc[(df["Have you participated in an exchange?"] == "yes") & \
        df["If yes, where did you do your exchange?"].isna(), ["Have you participated in an exchange?", 
                                                                "If yes, where did you do your exchange?"]].shape[0] == 0
)

In [113]:
# data_checker(df["If you did not do an exchange, what were your reasons for not?"], check_unique=True)

In [114]:
df["If you did not do an exchange, what were your reasons for not?"].fillna("No answer", inplace=True)

In [115]:
assert(
    df.loc[(df["Have you participated in an exchange?"] == "yes") & \
        (df["If you did not do an exchange, what were your reasons for not?"] != "No answer"), :].shape[0] == 0
)

In [116]:
df_no_exchange_reasons = \
    process_multiple_choice_survey(df["If you did not do an exchange, what were your reasons for not?"], column="exchangeReasons")

In [117]:
df["No Exchange Reasons"] = df_no_exchange_reasons["exchangeReasons"]

In [118]:
fig = create_figure_with_dropdown(df, 
                            options=["Have you participated in an exchange?",
                                     "If yes, where did you do your exchange?",
                                     "No Exchange Reasons"],
                            labels=break_text(["Have you participated in an exchange?",
                                     "If yes, where did you do your exchange? (Sample Size: 2)",
                                     "If you did not do an exchange, what were your reasons for not? (Sample Size: 47)"], char_limit=27),
                            showticklabels=[False, False, False],
                            sort_traces=True,
                            textposition="auto",
                            horizontal=True
    )
change_to_dark_mode_plotly(fig)
fig.update_xaxes(showticklabels=False)
fig.show(renderer="notebook_connected") # Changing the renderer in the show statement does not change the rendering method in the HTML!!!

In [119]:
import pandas as pd
df =  pd.DataFrame({"a" : [1,2,2]})

pd.DataFrame(df.value_counts()).reset_index().T
pd.DataFrame(df.value_counts()).T

a,2,1
0,2,1


In [120]:
pd.DataFrame(df.value_counts()).reindex([(1,), (2,)])

Unnamed: 0_level_0,0
a,Unnamed: 1_level_1
1,1
2,2


In [121]:
import plotly.graph_objects as go

import urllib
import json

# Load heatmap data
response = urllib.request.urlopen(
    "https://raw.githubusercontent.com/plotly/datasets/master/custom_heatmap_colorscale.json")
dataset = json.load(response)

# Create and show figure
fig = go.Figure()

fig.add_trace(go.Heatmap(
    z=dataset["z"],
    colorbar=dict(orientation='h')))

fig.show()

# Test Multiple Render Displays