<a href="https://colab.research.google.com/github/TylerFlar/CAPEs-ranking/blob/main/Better_CAPEs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/TylerFlar/CAPEs-ranking

In [None]:
!pip install ipyaggrid

In [5]:
from google.colab import output
output.enable_custom_widget_manager()

In [9]:
import glob
import pandas as pd
import numpy as np
import datetime
from ipyaggrid import Grid
import ipywidgets as widgets

def load_data(directory):
    files = glob.glob(f"{directory}/**/*.csv", recursive=True)
    df_list = []
    file_count = 0
    for f in files:
        file_count += 1
        try:
            temp_df = pd.read_csv(f)
            if not temp_df.empty:
                df_list.append(temp_df)
        except pd.errors.EmptyDataError:
            print(f'File {f} is empty, skipping.')
    print('All files processed.')
    df = pd.concat(df_list, ignore_index=True)
    df.replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=["",""], regex=True, inplace=True)
    return df

def clean_data(df):
    df.replace(['N/A', r"\\t|\\n|\\r", "\t|\n|\r"], [np.nan, "", ""], regex=True, inplace=True)
    df.dropna(inplace=True)
    df['Course'] = df['Course'].str.replace(r'\([A-Z]\)$', '', regex=True)
    df['Avg Grade Expected'] = df['Avg Grade Expected'].str.extract(r'\((.*?)\)', expand=False)
    df['Avg Grade Received'] = df['Avg Grade Received'].str.extract(r'\((.*?)\)', expand=False)
    for col in ['Rcmnd Class', 'Rcmnd Instr']:
        df[col] = df[col].str.rstrip('%').astype('float') / 100.0
    for col in ['Avg Grade Expected', 'Avg Grade Received', 'Study Hrs/wk']:
        df[col] = df[col].astype(float)
    return df

df = load_data('/content/CAPEs-ranking/evals')
df = clean_data(df)

def weighted_mean(x, weights):
    return np.average(x, weights=weights)

def term_to_date(term):
    term_mapping = {'FA': '09', 'WI': '01', 'SP': '03', 'S1': '06', 'S2': '08', 'S3': '07'}
    term_season, term_year = term[:2], term[2:]
    if term_year <= str(datetime.datetime.now().year)[-2:]:
        term_year = '20' + term_year
    else:
        term_year = '19' + term_year
    return pd.to_datetime(term_year + term_mapping[term_season] + '01')

df['Term'] = df['Term'].apply(term_to_date)

agg_dict = {
    'Term': 'max',
    'Enroll': 'sum',
    'Evals Made': 'sum',
    'Rcmnd Class': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Rcmnd Instr': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Study Hrs/wk': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Avg Grade Expected': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Evals Made']),
    'Avg Grade Received': lambda x: weighted_mean(x, weights=df.loc[x.index, 'Enroll'])
}

df = df.groupby(['Instructor', 'Course']).agg(agg_dict).reset_index()

df['Term'] = df['Term'].apply(lambda x: {1: 'WI', 3: 'SP', 6: 'S1', 7: 'S3', 8: 'S2', 9: 'FA'}[x.month] + str(x.year)[-2:])
df['Study Hrs/wk'] = df['Study Hrs/wk'].round(2)
df['Avg Grade Expected'] = df['Avg Grade Expected'].round(2)
df['Avg Grade Received'] = df['Avg Grade Received'].round(2)

grid_container = widgets.Output()

display(grid_container)

def on_button_clicked(b):
    grid_container.clear_output()

    primary_column = primary_column_select.value
    secondary_column = secondary_column_select.value
    order_primary = True if primary_sort_order.value == 'Ascending' else False
    order_secondary = True if secondary_sort_order.value == 'Ascending' else False
    sorted_df = df.sort_values(by=[primary_column, secondary_column], ascending=[order_primary, order_secondary])

    with grid_container:
        display(get_grid(sorted_df))

primary_column_select = widgets.Dropdown(
    options=df.columns.tolist(),
    value=df.columns[0],
    description='Primary Sort Column:',
)

primary_sort_order = widgets.Dropdown(
    options=['Ascending', 'Descending'],
    value='Ascending',
    description='Primary Sort Order:',
)

secondary_column_select = widgets.Dropdown(
    options=df.columns.tolist(),
    value=df.columns[0],
    description='Secondary Sort Column:',
)

secondary_sort_order = widgets.Dropdown(
    options=['Ascending', 'Descending'],
    value='Ascending',
    description='Secondary Sort Order:',
)

sort_button = widgets.Button(description="Sort Data")
sort_button.on_click(on_button_clicked)

display(primary_column_select)
display(primary_sort_order)
display(secondary_column_select)
display(secondary_sort_order)
display(sort_button)

def get_grid(data):
    data = data.copy()
    data['Rcmnd Class'] = data['Rcmnd Class'].apply(lambda x: '{:.1%}'.format(x))
    data['Rcmnd Instr'] = data['Rcmnd Instr'].apply(lambda x: '{:.1%}'.format(x))

    grid_options = {
        'columnDefs' : [{'field': c} for c in data.columns],
        'enableSorting': True,
        'enableFilter': True,
        'enableColResize': True,
        'enableRangeSelection': True,
    }

    grid = Grid(grid_data=data,
                grid_options=grid_options,
                quick_filter=True,
                show_toggle_edit=True,
                export_mode="buttons",
                export_csv=True,
                export_excel=True,
                theme='ag-theme-balham',
                show_toggle_delete=True,
                index=True,
                keep_multiindex=False)

    return grid




File /content/CAPEs-ranking/evals/cse/cse-180r.csv is empty, skipping.
File /content/CAPEs-ranking/evals/cse/cse-199h.csv is empty, skipping.
File /content/CAPEs-ranking/evals/cse/cse-197c.csv is empty, skipping.
File /content/CAPEs-ranking/evals/cse/cse-199.csv is empty, skipping.
File /content/CAPEs-ranking/evals/cse/cse-197.csv is empty, skipping.
File /content/CAPEs-ranking/evals/cse/cse-195.csv is empty, skipping.
File /content/CAPEs-ranking/evals/cse/cse-99.csv is empty, skipping.
All files processed.


Output()

Dropdown(description='Primary Sort Column:', options=('Instructor', 'Course', 'Term', 'Enroll', 'Evals Made', …

Dropdown(description='Primary Sort Order:', options=('Ascending', 'Descending'), value='Ascending')

Dropdown(description='Secondary Sort Column:', options=('Instructor', 'Course', 'Term', 'Enroll', 'Evals Made'…

Dropdown(description='Secondary Sort Order:', options=('Ascending', 'Descending'), value='Ascending')

Button(description='Sort Data', style=ButtonStyle())