In [None]:
!jupyter nbextension enable --py widgetsnbextension

In [5]:
import glob
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import numpy as np

def load_data(directory):
    files = glob.glob(f"{directory}/**/*.csv", recursive=True)
    df_list = []
    file_count = 0
    for f in files:
        file_count += 1
        try:
            print(f'Reading file {file_count} of {len(files)}: {f}')
            temp_df = pd.read_csv(f)
            if not temp_df.empty: 
                df_list.append(temp_df)
        except pd.errors.EmptyDataError:
            print(f'File {f} is empty, skipping.')
    print('All files processed.')
    df = pd.concat(df_list, ignore_index=True)
    df.replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=["",""], regex=True, inplace=True)
    return df

df = load_data('evals')  

df.replace('N/A', np.nan, inplace=True)
df.dropna(inplace=True)

df['Course'] = df['Course'].str.replace(r'\([A-Z]\)$', '', regex=True)

df['Avg Grade Expected'] = df['Avg Grade Expected'].str.extract(r'\((.*?)\)', expand=False)
df['Avg Grade Received'] = df['Avg Grade Received'].str.extract(r'\((.*?)\)', expand=False)

df['Rcmnd Class'] = df['Rcmnd Class'].str.rstrip('%').astype('float') / 100.0
df['Rcmnd Instr'] = df['Rcmnd Instr'].str.rstrip('%').astype('float') / 100.0
df['Avg Grade Expected'] = df['Avg Grade Expected'].astype(float)
df['Avg Grade Received'] = df['Avg Grade Received'].astype(float)
df['Study Hrs/wk'] = df['Study Hrs/wk'].astype(float)

def weighted_mean(x, weights):
    return np.average(x, weights=weights)

agg_dict = {
    'Term': 'last',
    'Enroll': 'sum',
    'Evals Made': 'sum',
    'Rcmnd Class': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Rcmnd Instr': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Study Hrs/wk': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Avg Grade Expected': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Avg Grade Received': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made'])
}

df = df.groupby(['Instructor', 'Course']).agg(agg_dict).reset_index()

df['Rcmnd Class'] = df['Rcmnd Class'].apply(lambda x: '{:.1%}'.format(x))
df['Rcmnd Instr'] = df['Rcmnd Instr'].apply(lambda x: '{:.1%}'.format(x))

display(df.head(50))

Reading file 1 of 4048: evals/nano/nano-175.csv
Reading file 2 of 4048: evals/nano/nano-134.csv
Reading file 3 of 4048: evals/nano/nano-103.csv
Reading file 4 of 4048: evals/nano/nano-114.csv
Reading file 5 of 4048: evals/nano/nano-4.csv
Reading file 6 of 4048: evals/nano/nano-112.csv
Reading file 7 of 4048: evals/nano/nano-115L.csv
Reading file 8 of 4048: evals/nano/nano-102.csv
Reading file 9 of 4048: evals/nano/nano-159.csv
Reading file 10 of 4048: evals/nano/nano-108.csv
Reading file 11 of 4048: evals/nano/nano-150.csv
Reading file 12 of 4048: evals/nano/nano-119.csv
Reading file 13 of 4048: evals/nano/nano-111.csv
Reading file 14 of 4048: evals/nano/nano-174L.csv
Reading file 15 of 4048: evals/nano/nano-120A.csv
Reading file 16 of 4048: evals/nano/nano-158.csv
Reading file 17 of 4048: evals/nano/nano-106.csv
Reading file 18 of 4048: evals/nano/nano-100L.csv
Reading file 19 of 4048: evals/nano/nano-146.csv
Reading file 20 of 4048: evals/nano/nano-161.csv
Reading file 21 of 4048: ev

Unnamed: 0,Instructor,Course,Term,Enroll,Evals Made,Rcmnd Class,Rcmnd Instr,Study Hrs/wk,Avg Grade Expected,Avg Grade Received
0,"AMARO, ROMMIE",CHEM 167 - Medicinal Chemistry,SP13,114,77,93.2%,91.9%,5.241169,3.461948,3.413506
1,"Aamari, Eddie",MATH 11 - Calculus-Based Prob & Stats,WI18,178,102,76.5%,74.2%,7.64,3.23,2.76
2,"Aamari, Eddie",MATH 181B - Intro/Math Statistics II,SP18,61,20,66.7%,55.6%,7.83,3.33,3.27
3,"Aamari, Eddie",MATH 183 - Statistical Methods,FA17,203,87,87.5%,76.3%,6.4,3.49,2.91
4,"Aamari, Eddie",MATH 20E - Vector Calculus,WI18,174,76,86.1%,75.0%,6.96,3.26,3.09
5,"Aarons, Sarah Miranda",ESYS 102 - The Solid and Fluid Earth,WI21,227,83,94.0%,95.1%,5.147711,3.563373,3.492651
6,"Abarbanel, Henry Don Isaac",PHYS 130A - Quantum Physics I,SP17,104,34,75.9%,37.9%,6.57,3.07,3.03
7,"Abbasi Hafshejani, Anahita",MUS 5 - Sound in Time,S218,197,88,64.8%,69.3%,2.863182,3.840795,3.789886
8,"Abbasi, Shaghayegh",BENG 100 - Stat Reasoning Bioeng Applns,S119,30,9,66.7%,100.0%,7.39,3.11,2.7
9,"Abd El-Messih, Andrew Saad",ECE 100 - Linear Electronic Systems,WI19,43,40,76.9%,74.4%,9.0,3.16,2.79
