In [None]:
from __future__ import print_function

# Jupyter display
from IPython.display import display

# json
import json

# widgets
import ipywidgets as widgets
import bqplot as bq
import ipyleaflet as ll

# numerics
import pandas as pd
import numpy as np
from sklearn import linear_model

# colormap
import matplotlib as mpl
import matplotlib.cm
import matplotlib.colors

def n_colors(n, colormap=mpl.cm.Blues):
    data = np.linspace(0.0,1.0,n)
    c = [mpl.colors.rgb2hex(d[0:3]) for d in colormap(data)]
    return c

def data_to_colors(data, colormap=mpl.cm.plasma):
    c = [mpl.colors.rgb2hex(d[0:3]) for d in colormap(mpl.colors.Normalize()(data))]
    return c

In [None]:
data = pd.read_csv('./../analysis_data/merged_data.csv')

In [None]:
race_data = data[['hispanic', 'white',
       'black', 'american_indian', 'asian', 'pac_islander', 'other_races', 'two_races']]

In [None]:
race_data = race_data / 100.

In [None]:
result = data['scores']

In [None]:
clf = linear_model.LinearRegression(fit_intercept=False)

In [None]:
clf.fit(race_data.values, result.values)

In [None]:
clf.coef_

In [None]:
clf.intercept_

In [None]:
race_corrected_scores = result.values - clf.predict(race_data.values)

In [None]:
race_corrected_scores

In [None]:
race_corrected_data = data.copy(deep=True)

In [None]:
race_corrected_data.rename(columns={'Unnamed: 0': 'zipcode'}, inplace=True)

In [None]:
race_corrected_data['scores'] = race_corrected_scores

In [None]:
race_corrected_data.to_csv('./../analysis_data/race_corrected_merged_data.csv')

In [None]:
race_corrected_data.corr().ix[:, -1]

## TEST

In [None]:
other_race_data = data[['hispanic',
       'black', 'american_indian', 'asian', 'pac_islander', 'other_races', 'two_races']]
white_data = data[['white']]

In [None]:
other_race_data = other_race_data / 100.
white_data = white_data / 100.

In [None]:
other_races = other_race_data.sum(axis=1)

In [None]:
clf2 = linear_model.LinearRegression(fit_intercept=False)

In [None]:
clf.fit(np.array([other_races.values, white_data.values.flatten()]).T, result.values)

In [None]:
other_race_corrected_scores = result.values - \
            clf.predict(np.array([other_races.values, white_data.values.flatten()]).T)

In [None]:
other_race_corrected_scores

In [None]:
other_race_corrected_data = data.copy(deep=True)

In [None]:
other_race_corrected_data.rename(columns={'Unnamed: 0': 'zipcode'}, inplace=True)

In [None]:
other_race_corrected_data['scores'] = other_race_corrected_scores

In [None]:
race_corrected_data.corr().ix[:, -1]

In [None]:
other_race_corrected_data.corr().ix[:, -1]

In [None]:
other_race_corrected_data.to_csv('./../analysis_data/other_race_corrected_merged_data.csv')