# UW Emails only
## Oldest 15 responses before the large uptick

In [None]:
# Imports
import pandas as pd
import plotly.express as px
import plotly.io as pio
import random
import datetime
import plotly.graph_objects as go
import os

In [None]:
# Methods

# Groups by the given column name and provides a count within each category
# Abd returns a new dataframe
def counter(df, column):
    return df.groupby([column]).size().reset_index(name='count')

# file output
def output(figure, title):
    if not os.path.exists("images/uw"):
        os.mkdir("images/uw")
    figure.write_image(f'images/uw/{title[0]}-{title[1]}.jpeg')

In [None]:
# Settings
pio.renderers.default = 'vscode'
pio.templates.default = 'plotly'

In [None]:
# Data Import
df_csv = pd.read_csv('data/data.csv')
temp_csv = df_csv.copy()
temp_csv = temp_csv.drop([0, 1])

# Top 15
temp_csv = temp_csv.sort_values(
    by='EndDate',
    ascending=True
)

In [None]:
# Reformatting strings as datetimes
temp_csv['StartDate'] = pd.to_datetime(temp_csv['StartDate'], format='%Y-%m-%d %H:%M:%S')
temp_csv['EndDate'] = pd.to_datetime(temp_csv['EndDate'], format='%Y-%m-%d %H:%M:%S')

In [None]:
# Cleaning the data
ages = ['under 18', 'above 25']
temp_csv = temp_csv[temp_csv.Q2.isin(ages) == False]
# temp_csv = temp_csv[temp_csv.Q3 != 'Never']
temp_csv = temp_csv[temp_csv.Q1 != "ckh22@uw.edu"]

temp_csv = temp_csv[temp_csv['Q1'].str.contains("@uw.edu")]
temp_csv = temp_csv.reset_index(drop=True)
emailList = temp_csv.Q1.tolist()
print("dataset size is:", len(emailList), '\n', emailList)

In [None]:
# Raffle 
# email_column = temp_csv.loc[:,'Q1']
# emails = email_column.values
# print("Selected winner is", random.choice(emails))

In [None]:
# Vis 1
title = ['vis1', 'Q2']
vis1_data = counter(temp_csv, 'Q2')
fig = px.bar(data_frame=vis1_data, x='Q2', y='count')
fig.show()
output(fig, title)

![Vis1](images/uw/vis1-Q2.jpeg)

In [None]:
# Vis 2
title = ['vis2', 'Q3']
vis2_data = counter(temp_csv, 'Q3')
fig2 = px.bar(data_frame=vis2_data, x='Q3', y='count')
fig2.show()
output(fig2, title)

![Vis2](images/uw/vis2-Q3.jpeg)

In [None]:
# Vis 3
title = ['vis3', 'Q7']
vis3_data = counter(temp_csv, 'Q7')
fig3 = px.bar(data_frame=vis3_data, x='Q7', y='count')
fig3.show()
output(fig3, title)

![Vis3](images/uw/vis3-Q7.jpeg)

In [None]:
# Vis 4
title = ['vis4', 'Q8']
vis4_data = counter(temp_csv, 'Q8')
fig4 = px.bar(data_frame=vis4_data, x='Q8', y='count')
fig4.show()
output(fig4, title)

![Vis4](images/uw/vis4-Q8.jpeg)

In [None]:
# Vis 5
title = ['vis5', 'Q5']
vis5_data = counter(temp_csv, 'Q5')
fig5 = px.pie(data_frame=vis5_data, names='Q5', values='count')
fig5.show()
output(fig5, title)

![Vis5](images/uw/vis5-Q5.jpeg)

In [None]:
# Vis 6 - Heatmap
title = ['vis6', 'Q4']
fig6 = px.density_heatmap(data_frame=temp_csv, x='Q4', y='Q8', nbinsx=50, nbinsy=50, category_orders={'Q4': ['None', 'Once or twice', 'Three to four times', 'More than four times'], 'Q8': [
                          'Credit/Debit card', 'Husky ID', 'Cash']}, labels=dict(Q8="Typical Payment Method", Q4='Frequency of VM use'), title='Payment vs Frequency of Use')
fig6.show()
output(fig6, title)


![Vis6](images/uw/vis6-Q4.jpeg)

In [None]:
# Vis 7 - Area Graph
title = ['vis7', 'Survey']
vis7_data = temp_csv.groupby([pd.Grouper(key="EndDate", freq='1D'), "DistributionChannel"])['EndDate'].count().reset_index(name="count")
fig7 = px.bar(data_frame=vis7_data, x='EndDate', y='count', color='DistributionChannel')
fig7.show()
output(fig7, title)

![Vis7](images/uw/vis7-Survey.jpeg)