In [1]:
import pandas as pd
import numpy as np

import os
ROOT = os.popen("git rev-parse --show-toplevel").read().split("\n")[0]

import sys
sys.path.append(ROOT)

import src as sc

# Prepare Campus Data

The cell below can also be carried out in one step by running 
```
> python src/preprocessing.py
> python src/locations.py 
```
from the top level git directory.

In [None]:
# Dataframe with one row per enrolled student.
student_df = sc.get_student_enrollment_data()

# One-hot table with halls as rows and schools as columns.
hall_df = sc.get_hall_by_school_table(student_df = student_df)


# Proportionally assigns students to buildings, writing
# student dataframes to csv as:
#      "../data/filled_buildings/<building_name>_students.csv"

sc.fill_buildings(student_df = student_df, hall_df = hall_df)


# Compute Cost & Assignment Matrix

The cell below can also be carried out in one step by running
```
> python src/learning.py <beta> <lambda> <tau> <init> <iterations>
```
from the top level git directory.  Here the values in angle brackets should be replaced by the appropriate values, where 

```
        beta: (float) beta factor determines weight of the diversity 
            objectives in the optimization (i.e. "term 1").
        lam: (float) lambda factor determines weight of artwork capacity 
            constraints in optimization (i.e. "term 2").
        tau: (float) tau factor determines weight of preference for current 
            assignment in optimization (i.e. "term 3").
        init: (int) one of the following: 
                1 - identity matrix initialization
                2 - uniform initialization
                3 - current assignment initialization
                4 - random permutation initialization
        iterations: (int) number of iterations of gradient descent
```
Output will be printed to `output` folder.

In [None]:
# Load data
hall_df, student_df, art_df = sc.load_data()


In [None]:
# Get building_capacity_df
building_capacity_df = sc.get_building_capacity_df()
building_capacity_df

In [None]:
# Get art_capacity_df
art_capacity_df = sc.get_art_capacity_with_downsampling(student_df,art_df,categories = ["gender","race"])
art_capacity_df

In [None]:
# Compute full n_buildings x n_artworks cost matrix.
cost_df = sc.compute_cost_matrix(hall_df = hall_df,
                                 student_df = student_df,
                                 art_df = art_df,
                                 categories = ["gender","race"],
                                 alpha = -1,
                                 beta = 100)

cost_df

In [None]:
# Compute normalizing constants for lambda and tau
norm_lam_factor, norm_tau_factor = sc.get_normalizing_constants(
                                                hall_df = hall_df,
                                                student_df = student_df,
                                                art_df = art_df
                                                               )

norm_lam_factor, norm_tau_factor

In [None]:
assignment_df = sc.learn_optimal_assignment(
                             hall_df, 
                             student_df,
                             art_df,
                             cost_df, 
                             lam = norm_lam_factor*10000, 
                             tau=norm_tau_factor*800,
                             init = 4,
                             iterations = 1000
                             ) 

assignment_df

# Create Heatmap Visualization

In [None]:
clipped_assignment_df = assignment_df.clip(upper = 1)
sc.assignment_heatmat(assignment_df)

## Validation

In [None]:
sc.baseline_average_value(category = "gender", in_group = "Man")

In [None]:
sc.optimized_average_value(assignment_df, category = "gender", in_group = "Man")

In [None]:
sc.baseline_average_value(category = "race", in_group = "White")

In [None]:
sc.optimized_average_value(assignment_df, category = "race", in_group = "White")

## Make Visualizations

In [None]:
sc.campus_building_map()

In [None]:
sc.beeswarm_building_gender(demo_cat = "race", building_list = ["Aidekman","CLIC","dental_school"])

In [None]:
hall_df, student_df, art_df = sc.load_data()
sc.beeswarm_gender(demo_df = art_df, demo_cat = "race", title="Total Count by Race and Gender")