In [None]:
import pandas as pd
pd.options.display.max_columns = None

In [None]:
survey = pd.read_csv('f20_lab_survey.csv')
survey = survey[[c for c in list(survey.columns) if c[0:2] == '41']]
survey = survey[[c for c in survey.columns if c.split(':')[0] not in ['410964', '410961', '410968']]]

In [None]:
column_rename = {}
index_to_question = {}
question_to_index = {}

cols = list(survey.columns)
for i in range(len(cols)):
    col = cols[i]
    q = col.split(': ')[1]
    column_rename[col] = i+1
    index_to_question[i+1] = q
    question_to_index[q] = i+1

survey = survey.rename(columns=column_rename)

In [None]:
# Response scales
AGREE_SCALE = {1 : 'Strongly Disagree',
               2 : 'Disagree',
               3 : 'Neutral',
               4 : 'Agree',
               5 : 'Strongly Agree'}
HARDNESS_SCALE = {1 : 'Too Hard',
                  3 : 'Just Right',
                  5 : 'Too Easy'}
DIFFICULTY_SCALE = {1 : 'Easy',
                    5 : 'Hard'}
CLARITY_SCALE = {1 : 'Poor',
                 5 : 'Clear'}

In [None]:
# Set scale for each question
scales = {}
for col in survey.columns:
    if int(col) in [1,2,3,5,11,12,13,14,17,18,20,22,23,24,25,26,27]:
        scales[col] = AGREE_SCALE
    elif int(col) == 4:
        scales[col] = HARDNESS_SCALE
    elif int(col) == 15:
        scales[col] = DIFFICULTY_SCALE
    elif int(col) == 16:
        scales[col] = CLARITY_SCALE
    else:
        scales[col] = None
survey = survey.append(scales, ignore_index=True).rename(index={77:'scale'})

In [None]:
survey = survey.transpose()

In [None]:
agree_questions = survey[survey.scale == AGREE_SCALE].drop(columns='scale')

In [None]:
def scale_questions(scale_constant):
    questions = survey[survey.scale == scale_constant].drop(columns='scale')
    statistics = {}
    
    for index, row in questions.iterrows():
        stats = {}
        stats['NA'] = sum(row.isna())
        for i in range(1,6):
            stats[i] = 0
        series = pd.Series([int(i.split(' - ')[0]) for i in row[~row.isna()]])
        stats.update(series.value_counts().to_dict())
        stats['avg'] = round(series.mean(),2)
        statistics[index] = stats
        
    if len(statistics) == 1:
        i = list(statistics)[0]
        row = list(statistics.values())[0]
        statistics = pd.DataFrame(row, index=[i])[['NA',1,2,3,4,5,'avg']].reset_index()
    else:
        statistics = pd.DataFrame(statistics).transpose()[['NA',1,2,3,4,5,'avg']].reset_index()
    statistics['index'] = statistics['index'].apply(lambda x: index_to_question[x])
    statistics = statistics.rename(columns={'index' : 'question'})

    scale = {c : None for c in statistics.columns}
    scale.update(scale_constant)
    return pd.concat([pd.DataFrame(scale, index=[0]), statistics], ignore_index=True)

In [None]:
def lab_ranks(ls):
    ls = [i.split(',') for i in ls[~ls.isna()]]
    ls = pd.Series([item for sublist in ls for item in sublist])
    return pd.DataFrame(ls.value_counts())

In [None]:
# Ranks for top 3 labs and bottom 3 labs
top_labs = lab_ranks(survey.loc[6])
bottom_labs = lab_ranks(survey.loc[9])

In [None]:
# Jupyter vs. Web-based question
pref = pd.DataFrame(survey.loc[19].value_counts()).transpose()
pref['question'] = index_to_question[19]
pref = pref[['question','Web-based', 'Jupyter Notebook']]

In [None]:
def process_text(i):
    ls = survey.loc[i]
    ls = list(ls[~ls.isna()])
    return '\n\n'.join(ls)

In [None]:
report = open("f20_report.md", "w")  
report.write('# **Fall 2020 Lab Survey** \n\n')
report.write('\n\n ## **Numerical Questions** \n\n')
report.write(scale_questions(AGREE_SCALE).to_html(buf=None))
report.write(scale_questions(HARDNESS_SCALE).to_html(buf=None))
report.write(scale_questions(DIFFICULTY_SCALE).to_html(buf=None))
report.write(scale_questions(CLARITY_SCALE).to_html(buf=None))
report.write(pref.to_html(buf=None))
report.write('\n\n ## **Appearances under "Top 3 Labs"** \n\n')
report.write(top_labs.to_html(buf=None))
report.write('\n\n ## **Appearances under "Bottom 3 Labs"** \n\n')
report.write(bottom_labs.to_html(buf=None))

report.write('\n\n ## **Text Questions** \n\n')
for i in [7,8,10,21,28,29,30,31]:
    report.write('\n\n ### **%s** \n\n' % (index_to_question[i]))
    report.write(process_text(i))
        
report.close() 

In [None]:
#!grip f20_report.md