# UW Emails only
## Oldest 15 responses before the large uptick

In [8]:
# Imports
import pandas as pd
import plotly.express as px
import plotly.io as pio
import random
import datetime
import plotly.graph_objects as go
import os


In [9]:
# Methods

# Groups by the given column name and provides a count within each category
# Abd returns a new dataframe
def counter(df, column):
    return df.groupby([column]).size().reset_index(name='count')


def output(figure, title):
    if not os.path.exists("images/uw"):
        os.mkdir("images/uw")
    figure.write_image(f'images/uw/{title[0]}-{title[1]}.jpeg')


def splitText(string):
    words = string.split()
    grouped_words = [' '.join(words[i: i + 2])
                     for i in range(0, len(words), 2)]
    return grouped_words

def getQuestions():
    questions.loc['Q1':'Q14']

    for index, value in questions.loc['Q1':'Q14'].items():
        print(index, '\n', value)

In [10]:
# Settings
pio.renderers.default = 'vscode'
pio.templates.default = 'plotly'

In [11]:
# Data Import
df_csv = pd.read_csv('data/data.csv')

questions = df_csv.iloc[0]

temp_csv = df_csv.copy()
temp_csv = temp_csv.drop([0, 1])

# Top 15
temp_csv = temp_csv.sort_values(
    by='EndDate',
    ascending=True
)

In [12]:
# Reformatting strings as datetimes
temp_csv['StartDate'] = pd.to_datetime(
    temp_csv['StartDate'], format='%Y-%m-%d %H:%M:%S')
temp_csv['EndDate'] = pd.to_datetime(
    temp_csv['EndDate'], format='%Y-%m-%d %H:%M:%S')


In [13]:
# Cleaning the data
ages = ['under 18', 'above 25']
temp_csv = temp_csv[temp_csv.Q2.isin(ages) == False]
# temp_csv = temp_csv[temp_csv.Q3 != 'Never']
temp_csv = temp_csv[temp_csv.Q1 != "ckh22@uw.edu"]

temp_csv = temp_csv[temp_csv['Q1'].str.contains("@uw.edu")]
temp_csv = temp_csv.reset_index(drop=True)
emailList = temp_csv.Q1.tolist()
print("dataset size is:", len(emailList), '\n', emailList)

dataset size is: 15 
 ['mj73@uw.edu', 'roabuda@uw.edu', 'kmp18@uw.edu', 'alidatm@uw.edu', 'ianb1@uw.edu', 'opplijen@uw.edu', 'novbiebo@uw.edu', 'malsh380@uw.edu', 'mb13@uw.edu', 'nikkima@uw.edu', 'dugabrie@uw.edu', 'rqr@uw.edu', 'Vbatalla@uw.edu', 'smea14@uw.edu', 'awong650@uw.edu']


In [14]:
# Raffle 
# email_column = temp_csv.loc[:,'Q1']
# emails = email_column.values
# print("Selected winner is", random.choice(emails))

In [15]:
# Vis 1
title = ['vis1', 'Q2']
vis1_data = counter(temp_csv, 'Q2')
fig = px.bar(data_frame=vis1_data, x='Q2', y='count')
output(fig, title)


![Vis1](images/uw/vis1-Q2.jpeg)

In [16]:
# Vis 2
title = ['vis2', 'Q3']
vis2_data = counter(temp_csv, 'Q3')
fig2 = px.bar(data_frame=vis2_data, x='Q3', y='count')
output(fig2, title)


![Vis2](images/uw/vis2-Q3.jpeg)

In [17]:
# Vis 3
title = ['vis3', 'Q7']
vis3_data = counter(temp_csv, 'Q7')
fig3 = px.bar(data_frame=vis3_data, x='Q7', y='count')
output(fig3, title)


![Vis3](images/uw/vis3-Q7.jpeg)

In [18]:
# Vis 4
title = ['vis4', 'Q8']
vis4_data = counter(temp_csv, 'Q8')
fig4 = px.bar(data_frame=vis4_data, x='Q8', y='count')
output(fig4, title)


![Vis4](images/uw/vis4-Q8.jpeg)

In [19]:
# Vis 5
title = ['vis5', 'Q5']
vis5_data = counter(temp_csv, 'Q5')
fig5 = px.pie(data_frame=vis5_data, names='Q5', values='count')
output(fig5, title)


![Vis5](images/uw/vis5-Q5.jpeg)

In [20]:
# Vis 6 - Heatmap
title = ['vis6', 'Q4']
fig6 = px.density_heatmap(data_frame=temp_csv, x='Q4', y='Q8', nbinsx=50, nbinsy=50, category_orders={'Q4': ['None', 'Once or twice', 'Three to four times', 'More than four times'], 'Q8': [
                          'Credit/Debit card', 'Husky ID', 'Cash']}, labels={'Q8': "Typical Payment Method", 'Q4': 'Frequency of VM use'}, title='Payment vs Frequency of Use')
output(fig6, title)


![Vis6](images/uw/vis6-Q4.jpeg)

In [21]:
# Vis 7 - Area Graph
title = ['vis7', 'Survey']
vis7_data = temp_csv.groupby([pd.Grouper(key="EndDate", freq='1D'), "DistributionChannel"])[
    'EndDate'].count().reset_index(name="count")
fig7 = px.bar(data_frame=vis7_data, x='EndDate',
              y='count', color='DistributionChannel', pattern_shape='DistributionChannel', pattern_shape_sequence=['.', 'x', '+'])
output(fig7, title)


![Vis7](images/uw/vis7-Survey.jpeg)

In [22]:
# Vis 8 - Q6_1 to 6

# Data Prep
labels = {'index': 'Rank','Q6_1': 'Snack bars','Q6_2': 'Chips','Q6_3': 'Candy','Q6_4': 'Jerky','Q6_5': 'Mixes',F'Q6_6': 'Chocolate bars'}
vis8_data = temp_csv.loc[:, 'Q6_1':'Q6_6']
vis8_data = vis8_data.apply(pd.Series.value_counts)
vis8_data = vis8_data.reset_index()
vis8_data.rename(columns=labels, inplace=True)
x_data = list(vis8_data.columns)
x_data.remove('Rank')

# Plotting
title = ['vis8', 'Q6']
fig8 = px.bar(data_frame=vis8_data, 
                x=x_data, 
                y='Rank', 
                orientation='h', 
                labels={'variable': 'Snack Type', 'value': 'Total Vote Count'}, 
                category_orders={'Rank': ['1', '2', '3', '4', '5', '6']}
              )
output(fig8, title)

In [23]:
# Vis 9 - Q9_2 & Q11_1

# Data Prep
vis9_data = temp_csv[['Q9_2', 'Q11_1']]
vis9_data = vis9_data.groupby(['Q9_2', 'Q11_1']).size().reset_index(name='counts')

# Plotting
title = ['vis9', 'Q9_2ANDQ11_1']
settings = {
    'labels': 
        {
            'Q9_2': 'Importance of amount<br>Among calories, amount, and price', 
            'counts': 'Votes', 
            'Q11_1': 'Likeliness of purchasing an healthier<br>alternative with a smaller amount'
        },
    'category_orders':
        {
            'Q9_2': ['1', '2', '3'], 
            'Q11_1': 
                [
                    'Extremely likely', 
                    'Somewhat likely', 
                    'Neither likely nor unlikely', 
                    'Somewhat unlikely', 
                    'Extremely unlikely'
                ]
        } 
    }
fig9 = px.bar(data_frame=vis9_data, 
              x='counts', 
              y='Q9_2',
              color='Q11_1',
              orientation='h',
              labels=settings['labels'], category_orders=settings['category_orders']              
              )
output(fig9, title)


In [24]:
hashMap = {}
for index, value in temp_csv.Q13.items():
    options = value.split(',')
    for item in options:
        if item not in hashMap:
            hashMap[item] = 1
        else:
            hashMap[item] += 1

vis10_data = pd.DataFrame(columns=['Snacks', 'Count'])
vis10_data
# df.loc[df.shape[0]] = ['India', 'Krishna', 'Kumar' ]
for key, value in hashMap.items():
    tempMap = {'Snacks': key, 'Count': value}
    vis10_data = vis10_data.append(tempMap, ignore_index=True)
    
vis10_data = vis10_data.sort_values(by='Count', ascending=False)

fig10 = go.Figure(data=[go.Pie(labels=vis10_data.Snacks.tolist(), values=vis10_data.Count.tolist(), textinfo='label+percent',
                             insidetextorientation='radial', hole=.66
                            )])
fig10.show()