# Predict household income from satellite imagery data

First pass.

General ML pipeline steps:
1. Import data
2. Split data into test/train sets
3. Preprocess test/train sets separately
4. Generate features from data
5. For each regressor-hyperparameter combination:
    - Train regressor with given hyperparameters and training data and labels
    - Generate predicted labels for test data with trained regressor
    - Evaluate regressor-hyperparameter performance against actual test labels and get $R^2$
6. Explore best-performing models

In [1]:
import os
import math
import pickle
import numpy as np
import pandas as pd 

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.svm import LinearSVR
from sklearn.svm import LinearSVC

from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import BaggingRegressor, GradientBoostingRegressor, RandomForestRegressor
from sklearn.ensemble import BaggingClassifier, GradientBoostingClassifier, RandomForestClassifier

from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

# Import configuration file
import config as cf

# Display options 
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = -1

# Turn off big pink warnings
import warnings
warnings.filterwarnings('ignore')

# Data file path 
final_data_file_path = "/Users/robmarty/Dropbox/World Bank/IEs/Pakistan Poverty Estimation from Satellites/Data/FinalData"
#


In [2]:
# Test grid to make sure everything works - limited models and parameters
GRID_TEST_CLASS = {
    'regressors': ['LinearSVC','DecisionTreeClassifier', 'BaggingClassifier',
                    'GradientBoostingClassifier', 'RandomForestClassifier'],
    'LinearSVC': [
        {'penalty': penalty, 'C': C, 'loss': loss, 'max_iter': max_iter,
        'random_state': 0} \
        for penalty in ('l2', ) \
        for C in (1e-2, 1e2) \
        for loss in ('epsilon_insensitive', ) \
        for max_iter in (1e3, 1e5)
    ],
    'DecisionTreeClassifier': [
        {'criterion': criterion, 'splitter': splitter, 'max_depth': max_depth,
        'max_features': max_features, 'random_state': 0} \
        for criterion in ('gini', ) \
        for splitter in ('best', ) \
        for max_depth in (1, 5, 10, 20, 30) \
        for max_features in ('sqrt', ) \
    ],
    'BaggingClassifier': [
        {'n_estimators': n_estimators, 'max_features': max_features,
        'random_state': 0, 'n_jobs': -1} \
        for n_estimators in (100, 1000, 10000) \
        for max_features in (0.3, 0.5, 1.0)
    ],
    'GradientBoostingClassifier': [
        {'loss': loss, 'learning_rate': rate, 'n_estimators': n_estimators,
        'criterion': criterion, 'max_features': max_features,
        'random_state': 0} \
        for loss in ('deviance', ) \
        for rate in (1e-4, )
        for n_estimators in (100, ) \
        for criterion in ('friedman_mse', ) \
        for max_features in ('sqrt', ) \
    ],
    'RandomForestClassifier': [
        {'n_estimators': n_estimators, 'criterion': criterion,
        'max_depth': max_depth, 'max_features': max_features, 'n_jobs': -1,
        'random_state': 0} \
        for n_estimators in (10, 100, 1000) \
        for criterion in ('gini', ) \
        for max_depth in (1, ) \
        for max_features in ('sqrt', )
    ]
}

## 1. Import data and drop "future" rows

In [3]:
DATA_PATH = os.path.join(final_data_file_path, 'BISP','Merged Datasets', 'bisp_socioeconomic_satellite_firstdiff_r13.csv')
df = pd.read_csv(DATA_PATH)
df.shape

(3177, 478)

In [4]:
df.head()

Unnamed: 0,uid,survey_round,province,psu,locality,period,treatment,panel,present11,present13,present16,hh_size,income_last_month_N_NAs,income_last_month,income_last_year_N_NAs,income_last_year,pscores,b1_buff_0.1km_mean,b2_buff_0.1km_mean,b3_buff_0.1km_mean,b4_buff_0.1km_mean,b5_buff_0.1km_mean,b6_buff_0.1km_mean,b7_buff_0.1km_mean,b12_buff_0.1km_mean,b13_buff_0.1km_mean,b14_buff_0.1km_mean,b15_buff_0.1km_mean,b16_buff_0.1km_mean,b17_buff_0.1km_mean,b23_buff_0.1km_mean,b24_buff_0.1km_mean,b25_buff_0.1km_mean,b26_buff_0.1km_mean,b27_buff_0.1km_mean,b34_buff_0.1km_mean,b35_buff_0.1km_mean,b36_buff_0.1km_mean,b37_buff_0.1km_mean,b45_buff_0.1km_mean,b46_buff_0.1km_mean,b47_buff_0.1km_mean,b56_buff_0.1km_mean,b57_buff_0.1km_mean,b67_buff_0.1km_mean,b1_buff_0.5km_mean,b2_buff_0.5km_mean,b3_buff_0.5km_mean,b4_buff_0.5km_mean,b5_buff_0.5km_mean,b6_buff_0.5km_mean,b7_buff_0.5km_mean,b12_buff_0.5km_mean,b13_buff_0.5km_mean,b14_buff_0.5km_mean,b15_buff_0.5km_mean,b16_buff_0.5km_mean,b17_buff_0.5km_mean,b23_buff_0.5km_mean,b24_buff_0.5km_mean,b25_buff_0.5km_mean,b26_buff_0.5km_mean,b27_buff_0.5km_mean,b34_buff_0.5km_mean,b35_buff_0.5km_mean,b36_buff_0.5km_mean,b37_buff_0.5km_mean,b45_buff_0.5km_mean,b46_buff_0.5km_mean,b47_buff_0.5km_mean,b56_buff_0.5km_mean,b57_buff_0.5km_mean,b67_buff_0.5km_mean,b1_buff_1km_mean,b2_buff_1km_mean,b3_buff_1km_mean,b4_buff_1km_mean,b5_buff_1km_mean,b6_buff_1km_mean,b7_buff_1km_mean,b12_buff_1km_mean,b13_buff_1km_mean,b14_buff_1km_mean,b15_buff_1km_mean,b16_buff_1km_mean,b17_buff_1km_mean,b23_buff_1km_mean,b24_buff_1km_mean,b25_buff_1km_mean,b26_buff_1km_mean,b27_buff_1km_mean,b34_buff_1km_mean,b35_buff_1km_mean,b36_buff_1km_mean,b37_buff_1km_mean,b45_buff_1km_mean,b46_buff_1km_mean,b47_buff_1km_mean,b56_buff_1km_mean,b57_buff_1km_mean,b67_buff_1km_mean,b1_buff_1.5km_mean,b2_buff_1.5km_mean,b3_buff_1.5km_mean,b4_buff_1.5km_mean,b5_buff_1.5km_mean,b6_buff_1.5km_mean,b7_buff_1.5km_mean,b12_buff_1.5km_mean,b13_buff_1.5km_mean,b14_buff_1.5km_mean,b15_buff_1.5km_mean,b16_buff_1.5km_mean,b17_buff_1.5km_mean,b23_buff_1.5km_mean,b24_buff_1.5km_mean,b25_buff_1.5km_mean,b26_buff_1.5km_mean,b27_buff_1.5km_mean,b34_buff_1.5km_mean,b35_buff_1.5km_mean,b36_buff_1.5km_mean,b37_buff_1.5km_mean,b45_buff_1.5km_mean,b46_buff_1.5km_mean,b47_buff_1.5km_mean,b56_buff_1.5km_mean,b57_buff_1.5km_mean,b67_buff_1.5km_mean,b1_buff_2km_mean,b2_buff_2km_mean,b3_buff_2km_mean,b4_buff_2km_mean,b5_buff_2km_mean,b6_buff_2km_mean,b7_buff_2km_mean,b12_buff_2km_mean,b13_buff_2km_mean,b14_buff_2km_mean,b15_buff_2km_mean,b16_buff_2km_mean,b17_buff_2km_mean,b23_buff_2km_mean,b24_buff_2km_mean,b25_buff_2km_mean,b26_buff_2km_mean,b27_buff_2km_mean,b34_buff_2km_mean,b35_buff_2km_mean,b36_buff_2km_mean,b37_buff_2km_mean,b45_buff_2km_mean,b46_buff_2km_mean,b47_buff_2km_mean,b56_buff_2km_mean,b57_buff_2km_mean,b67_buff_2km_mean,b1_buff_0.1km_min,b2_buff_0.1km_min,b3_buff_0.1km_min,b4_buff_0.1km_min,b5_buff_0.1km_min,b6_buff_0.1km_min,b7_buff_0.1km_min,b12_buff_0.1km_min,b13_buff_0.1km_min,b14_buff_0.1km_min,b15_buff_0.1km_min,b16_buff_0.1km_min,b17_buff_0.1km_min,b23_buff_0.1km_min,b24_buff_0.1km_min,b25_buff_0.1km_min,b26_buff_0.1km_min,b27_buff_0.1km_min,b34_buff_0.1km_min,b35_buff_0.1km_min,b36_buff_0.1km_min,b37_buff_0.1km_min,b45_buff_0.1km_min,b46_buff_0.1km_min,b47_buff_0.1km_min,b56_buff_0.1km_min,b57_buff_0.1km_min,b67_buff_0.1km_min,b1_buff_0.5km_min,b2_buff_0.5km_min,b3_buff_0.5km_min,b4_buff_0.5km_min,b5_buff_0.5km_min,b6_buff_0.5km_min,b7_buff_0.5km_min,b12_buff_0.5km_min,b13_buff_0.5km_min,b14_buff_0.5km_min,b15_buff_0.5km_min,b16_buff_0.5km_min,b17_buff_0.5km_min,b23_buff_0.5km_min,b24_buff_0.5km_min,b25_buff_0.5km_min,b26_buff_0.5km_min,b27_buff_0.5km_min,b34_buff_0.5km_min,b35_buff_0.5km_min,b36_buff_0.5km_min,b37_buff_0.5km_min,b45_buff_0.5km_min,b46_buff_0.5km_min,b47_buff_0.5km_min,b56_buff_0.5km_min,b57_buff_0.5km_min,b67_buff_0.5km_min,b1_buff_1km_min,b2_buff_1km_min,b3_buff_1km_min,b4_buff_1km_min,b5_buff_1km_min,b6_buff_1km_min,b7_buff_1km_min,b12_buff_1km_min,b13_buff_1km_min,b14_buff_1km_min,b15_buff_1km_min,b16_buff_1km_min,b17_buff_1km_min,b23_buff_1km_min,b24_buff_1km_min,b25_buff_1km_min,b26_buff_1km_min,b27_buff_1km_min,b34_buff_1km_min,b35_buff_1km_min,b36_buff_1km_min,b37_buff_1km_min,b45_buff_1km_min,b46_buff_1km_min,b47_buff_1km_min,b56_buff_1km_min,b57_buff_1km_min,b67_buff_1km_min,b1_buff_1.5km_min,b2_buff_1.5km_min,b3_buff_1.5km_min,b4_buff_1.5km_min,b5_buff_1.5km_min,b6_buff_1.5km_min,b7_buff_1.5km_min,b12_buff_1.5km_min,b13_buff_1.5km_min,b14_buff_1.5km_min,b15_buff_1.5km_min,b16_buff_1.5km_min,b17_buff_1.5km_min,b23_buff_1.5km_min,b24_buff_1.5km_min,b25_buff_1.5km_min,b26_buff_1.5km_min,b27_buff_1.5km_min,b34_buff_1.5km_min,b35_buff_1.5km_min,b36_buff_1.5km_min,b37_buff_1.5km_min,b45_buff_1.5km_min,b46_buff_1.5km_min,b47_buff_1.5km_min,b56_buff_1.5km_min,b57_buff_1.5km_min,b67_buff_1.5km_min,b1_buff_2km_min,b2_buff_2km_min,b3_buff_2km_min,b4_buff_2km_min,b5_buff_2km_min,b6_buff_2km_min,b7_buff_2km_min,b12_buff_2km_min,b13_buff_2km_min,b14_buff_2km_min,b15_buff_2km_min,b16_buff_2km_min,b17_buff_2km_min,b23_buff_2km_min,b24_buff_2km_min,b25_buff_2km_min,b26_buff_2km_min,b27_buff_2km_min,b34_buff_2km_min,b35_buff_2km_min,b36_buff_2km_min,b37_buff_2km_min,b45_buff_2km_min,b46_buff_2km_min,b47_buff_2km_min,b56_buff_2km_min,b57_buff_2km_min,b67_buff_2km_min,b1_buff_0.1km_max,b2_buff_0.1km_max,b3_buff_0.1km_max,b4_buff_0.1km_max,b5_buff_0.1km_max,b6_buff_0.1km_max,b7_buff_0.1km_max,b12_buff_0.1km_max,b13_buff_0.1km_max,b14_buff_0.1km_max,b15_buff_0.1km_max,b16_buff_0.1km_max,b17_buff_0.1km_max,b23_buff_0.1km_max,b24_buff_0.1km_max,b25_buff_0.1km_max,b26_buff_0.1km_max,b27_buff_0.1km_max,b34_buff_0.1km_max,b35_buff_0.1km_max,b36_buff_0.1km_max,b37_buff_0.1km_max,b45_buff_0.1km_max,b46_buff_0.1km_max,b47_buff_0.1km_max,b56_buff_0.1km_max,b57_buff_0.1km_max,b67_buff_0.1km_max,b1_buff_0.5km_max,b2_buff_0.5km_max,b3_buff_0.5km_max,b4_buff_0.5km_max,b5_buff_0.5km_max,b6_buff_0.5km_max,b7_buff_0.5km_max,b12_buff_0.5km_max,b13_buff_0.5km_max,b14_buff_0.5km_max,b15_buff_0.5km_max,b16_buff_0.5km_max,b17_buff_0.5km_max,b23_buff_0.5km_max,b24_buff_0.5km_max,b25_buff_0.5km_max,b26_buff_0.5km_max,b27_buff_0.5km_max,b34_buff_0.5km_max,b35_buff_0.5km_max,b36_buff_0.5km_max,b37_buff_0.5km_max,b45_buff_0.5km_max,b46_buff_0.5km_max,b47_buff_0.5km_max,b56_buff_0.5km_max,b57_buff_0.5km_max,b67_buff_0.5km_max,b1_buff_1km_max,b2_buff_1km_max,b3_buff_1km_max,b4_buff_1km_max,b5_buff_1km_max,b6_buff_1km_max,b7_buff_1km_max,b12_buff_1km_max,b13_buff_1km_max,b14_buff_1km_max,b15_buff_1km_max,b16_buff_1km_max,b17_buff_1km_max,b23_buff_1km_max,b24_buff_1km_max,b25_buff_1km_max,b26_buff_1km_max,b27_buff_1km_max,b34_buff_1km_max,b35_buff_1km_max,b36_buff_1km_max,b37_buff_1km_max,b45_buff_1km_max,b46_buff_1km_max,b47_buff_1km_max,b56_buff_1km_max,b57_buff_1km_max,b67_buff_1km_max,b1_buff_1.5km_max,b2_buff_1.5km_max,b3_buff_1.5km_max,b4_buff_1.5km_max,b5_buff_1.5km_max,b6_buff_1.5km_max,b7_buff_1.5km_max,b12_buff_1.5km_max,b13_buff_1.5km_max,b14_buff_1.5km_max,b15_buff_1.5km_max,b16_buff_1.5km_max,b17_buff_1.5km_max,b23_buff_1.5km_max,b24_buff_1.5km_max,b25_buff_1.5km_max,b26_buff_1.5km_max,b27_buff_1.5km_max,b34_buff_1.5km_max,b35_buff_1.5km_max,b36_buff_1.5km_max,b37_buff_1.5km_max,b45_buff_1.5km_max,b46_buff_1.5km_max,b47_buff_1.5km_max,b56_buff_1.5km_max,b57_buff_1.5km_max,b67_buff_1.5km_max,b1_buff_2km_max,b2_buff_2km_max,b3_buff_2km_max,b4_buff_2km_max,b5_buff_2km_max,b6_buff_2km_max,b7_buff_2km_max,b12_buff_2km_max,b13_buff_2km_max,b14_buff_2km_max,b15_buff_2km_max,b16_buff_2km_max,b17_buff_2km_max,b23_buff_2km_max,b24_buff_2km_max,b25_buff_2km_max,b26_buff_2km_max,b27_buff_2km_max,b34_buff_2km_max,b35_buff_2km_max,b36_buff_2km_max,b37_buff_2km_max,b45_buff_2km_max,b46_buff_2km_max,b47_buff_2km_max,b56_buff_2km_max,b57_buff_2km_max,b67_buff_2km_max,viirs_spatialmean_monthlymean_buff_1km,viirs_spatialmean_monthlysd_buff_1km,viirs_spatialmean_monthlymean_buff_2km,viirs_spatialmean_monthlysd_buff_2km,viirs_spatialmean_monthlymean_buff_3km,viirs_spatialmean_monthlysd_buff_3km,viirs_spatialmean_monthlymean_buff_5km,viirs_spatialmean_monthlysd_buff_5km,viirs_spatialmean_monthlymean_buff_10km,viirs_spatialmean_monthlysd_buff_10km,viirs_spatialmax_monthlymean_buff_1km,viirs_spatialmax_monthlysd_buff_1km,viirs_spatialmax_monthlymean_buff_2km,viirs_spatialmax_monthlysd_buff_2km,viirs_spatialmax_monthlymean_buff_3km,viirs_spatialmax_monthlysd_buff_3km,viirs_spatialmax_monthlymean_buff_5km,viirs_spatialmax_monthlysd_buff_5km,viirs_spatialmax_monthlymean_buff_10km,viirs_spatialmax_monthlysd_buff_10km,viirs_spatialmin_monthlymean_buff_1km,viirs_spatialmin_monthlysd_buff_1km,viirs_spatialmin_monthlymean_buff_2km,viirs_spatialmin_monthlysd_buff_2km,viirs_spatialmin_monthlymean_buff_3km,viirs_spatialmin_monthlysd_buff_3km,viirs_spatialmin_monthlymean_buff_5km,viirs_spatialmin_monthlysd_buff_5km,viirs_spatialmin_monthlymean_buff_10km,viirs_spatialmin_monthlysd_buff_10km,viirs_spatialsd_monthlymean_buff_1km,viirs_spatialsd_monthlysd_buff_1km,viirs_spatialsd_monthlymean_buff_2km,viirs_spatialsd_monthlysd_buff_2km,viirs_spatialsd_monthlymean_buff_3km,viirs_spatialsd_monthlysd_buff_3km,viirs_spatialsd_monthlymean_buff_5km,viirs_spatialsd_monthlysd_buff_5km,viirs_spatialsd_monthlymean_buff_10km,viirs_spatialsd_monthlysd_buff_10km,N_uid
0,100389,3,1,1,1,2,0,0,1,1,0,5,5,-1500,1,-9000.0,3.549999,-57.8,-109.814286,-159.485714,-46.671429,-206.685714,-44.928571,-215.814286,-0.011621,-0.021753,0.014686,-0.010519,0.014936,-0.019523,-0.01075,0.02699,-0.000793,0.027072,-0.00979,0.038089,0.009363,0.038035,0.000511,-0.030477,0.001349,-0.039572,0.031695,-0.009331,-0.0403,-45.527778,-86.612847,-165.947917,24.09375,-184.23206,-34.865162,-241.662037,-0.009912,-0.033849,0.02238,-0.009733,0.013671,-0.03397,-0.02508,0.034246,-0.001962,0.024162,-0.027293,0.058139,0.02109,0.046926,-0.00333,-0.041698,-0.010308,-0.066182,0.031131,-0.025907,-0.05487,-38.13466,-54.520337,-124.118536,126.720947,-98.793725,-30.003341,-184.361418,-0.001151,-0.024045,0.034539,0.000952,0.011668,-0.025312,-0.023822,0.040444,0.002119,0.014496,-0.026433,0.063285,0.024047,0.035483,-0.003654,-0.044497,-0.029081,-0.072213,0.01519,-0.029438,-0.04274,-31.700374,-37.736319,-97.614094,120.888939,-62.792914,-29.899716,-136.319308,0.002136,-0.016772,0.031034,0.003784,0.008863,-0.015902,-0.019665,0.03343,0.002346,0.008447,-0.019454,0.052656,0.020578,0.02597,-0.000631,-0.035614,-0.027969,-0.057247,0.007479,-0.022618,-0.029095,-43.114759,-50.848451,-116.343589,94.604455,-94.018576,-28.637798,-164.43248,0.002992,-0.015819,0.031013,0.003678,0.013014,-0.015355,-0.019581,0.032561,0.001455,0.012399,-0.019735,0.051935,0.019639,0.030433,-0.001049,-0.035344,-0.022282,-0.056641,0.012954,-0.022018,-0.034132,-43.0,-32.0,135.0,-156.5,-139.5,-35.0,-95.0,0.011309,0.075448,-0.005283,-0.003085,0.013567,0.000257,0.06698,-0.016099,-0.013666,0.006549,-0.010597,-0.077468,-0.075327,-0.049549,-0.075549,0.002908,0.026346,0.007082,0.023416,0.004229,-0.018455,-3.0,51.0,-20.0,-85.0,43.0,-23.5,14.0,0.036952,-0.011545,-0.019559,0.015827,-0.000663,0.01138,-0.049359,-0.054963,-0.017108,-0.024754,-0.025943,-0.011527,0.027783,0.006437,0.023349,0.043924,0.021448,0.03296,-0.017328,-0.006837,0.008572,-3.0,59.0,23.0,189.0,77.5,-10.5,47.0,0.042585,0.019687,0.062731,0.035053,0.000501,0.037823,-0.023494,0.029189,-0.004201,-0.02696,-0.005241,0.049611,0.018174,-0.011457,0.018279,-0.035769,-0.063184,-0.033099,-0.02978,-0.000843,0.02236,12.0,145.0,73.0,427.5,202.5,-26.0,107.5,0.106496,0.053283,0.159695,0.099502,-0.008175,0.09084,-0.053765,0.069467,-0.001271,-0.070206,-0.016239,0.116044,0.050239,-0.037439,0.037482,-0.075782,-0.157093,-0.080512,-0.085487,-0.014136,0.054999,12.0,145.0,73.0,427.5,202.5,-26.0,106.5,0.106496,0.053283,0.159695,0.099502,-0.008175,0.089979,-0.053765,0.069467,-0.001271,-0.070206,-0.017095,0.116044,0.050239,-0.037439,0.03662,-0.075782,-0.157093,-0.081241,-0.085487,-0.014944,0.054522,-110.5,-146.5,-251.0,-94.0,-154.5,12.0,-193.0,-0.000658,-0.024038,0.023386,0.014288,0.037997,0.005547,-0.024139,0.02622,0.016222,0.042397,0.006616,0.049831,0.039665,0.066304,0.030262,-0.010955,0.018061,-0.021406,0.028995,-0.01051,-0.039283,-2.0,-104.0,-276.0,184.0,-204.0,-23.0,-334.5,-0.027789,-0.062063,0.023453,-0.022276,-0.002612,-0.04861,-0.0363,0.051359,0.000509,0.023492,-0.02595,0.088178,0.034577,0.059612,0.008685,-0.055828,-0.031222,-0.083721,0.024472,-0.027902,-0.052773,19.5,-34.0,-179.5,102.0,-183.0,42.0,500.5,-0.014714,-0.042357,0.007604,-0.025551,0.000469,0.050267,-0.02907,0.022054,-0.01432,0.014673,0.06815,0.05136,0.01279,0.043953,0.099388,-0.03947,-0.008266,0.051623,0.031011,0.091273,0.05949,3.0,129.0,146.0,320.0,128.0,41.0,-108.5,0.02278,0.019421,0.034621,0.008157,0.005561,-0.010454,-0.002422,0.015241,-0.011107,-0.017383,-0.033266,0.018188,-0.009611,-0.014994,-0.032545,-0.027765,-0.032798,-0.051408,-0.004371,-0.022897,-0.017626,-55.5,-230.0,-356.0,-447.5,-471.5,68.0,-540.5,-0.024067,-0.029929,-0.028969,-0.016879,0.02283,-0.026913,-0.007668,-0.008485,0.001822,0.04825,-0.008176,-0.001127,0.009235,0.055251,-0.001203,0.010748,0.054478,-0.00011,0.039987,-0.011092,-0.051995,-0.062853,-0.350067,0.244809,-0.254202,0.27219,-0.405141,0.169938,-0.390237,0.040338,-0.209193,-0.457179,-1.132364,1.153745,-0.229618,-0.708209,-1.34218,1.484573,-0.935769,1.484573,-0.935769,-0.064717,0.03755,-0.016654,-0.017958,0.016061,0.017425,-0.003069,0.044795,0.004133,0.032205,-0.15278,-0.411878,0.279187,-0.107712,0.177808,-0.284663,0.313546,-0.086772,0.118105,-0.044077,2
1,100401,3,1,1,1,2,0,0,1,1,0,9,9,-7500,1,-75000.0,6.039999,-10.138889,4.388889,25.041667,128.611111,13.777778,-3.847222,121.472222,0.008733,0.018453,0.019272,0.007096,0.003933,0.040691,0.009991,0.015741,0.00064,-0.002129,0.035705,0.00807,-0.007815,-0.009775,0.026218,-0.019408,-0.023146,0.015414,-0.003521,0.034913,0.036018,-46.856895,-73.968134,-147.173233,61.769409,-153.911356,-33.988992,-216.469873,-0.004021,-0.026295,0.028389,-0.003687,0.014444,-0.026842,-0.023292,0.036012,-0.000546,0.020148,-0.025416,0.058418,0.020821,0.041228,-0.003207,-0.042251,-0.017212,-0.066436,0.024829,-0.025561,-0.048549,-33.10828,-43.913289,-111.013752,132.278083,-84.460046,-30.926896,-165.91271,0.000404,-0.022373,0.033712,0.001183,0.009684,-0.023583,-0.023674,0.038133,0.000974,0.010918,-0.026086,0.060768,0.022672,0.031519,-0.003462,-0.043124,-0.030655,-0.06933,0.012211,-0.027814,-0.038265,-42.950852,-53.228946,-122.012981,106.915719,-89.786941,-31.498838,-169.823301,0.002086,-0.018717,0.033342,0.004521,0.012986,-0.017489,-0.021664,0.036131,0.003246,0.013247,-0.021171,0.057378,0.023323,0.032872,-0.000464,-0.037672,-0.025372,-0.062142,0.012145,-0.025426,-0.036445,-45.595827,-55.97812,-124.018687,85.337736,-104.907003,-29.394702,-173.400581,0.002389,-0.016815,0.030556,0.003194,0.013878,-0.015865,-0.020024,0.032637,0.001465,0.013931,-0.019728,0.052464,0.020051,0.03248,-0.000623,-0.035459,-0.020622,-0.056788,0.014738,-0.02204,-0.035919,0.5,9.0,-62.0,308.0,-12.5,-20.0,-48.0,0.004759,-0.042967,0.042078,-0.002545,-0.002141,-0.019983,-0.047897,0.04659,-0.007088,-0.006023,-0.026316,0.077601,0.031092,0.024952,0.020414,-0.06784,-0.06772,-0.075593,-0.00023,-0.016915,-0.014424,-3.0,51.0,-21.0,361.5,43.0,-23.5,14.0,0.036952,-0.012269,0.071079,0.015827,-0.000663,0.01138,-0.05009,0.051,-0.017108,-0.024754,-0.025943,0.089602,0.028438,0.006885,0.024078,-0.079158,-0.096296,-0.074066,-0.017328,-0.006837,0.008572,-3.0,49.0,23.0,189.0,28.0,-24.0,-2.0,0.03608,0.019687,0.062731,0.01461,-0.000724,0.001158,-0.016842,0.035245,-0.019898,-0.024125,-0.035452,0.049611,-0.003547,-0.012869,-0.018707,-0.058282,-0.065198,-0.065348,-0.013559,-0.014132,0.001484,12.0,145.0,73.0,427.5,202.5,-26.0,107.5,0.106496,0.053283,0.159695,0.099502,-0.008175,0.09084,-0.053765,0.069467,-0.001271,-0.070206,-0.016239,0.116044,0.050239,-0.037439,0.037482,-0.075782,-0.157093,-0.080512,-0.085487,-0.014136,0.054999,12.0,138.0,73.0,93.0,56.0,-26.0,-11.0,0.101452,0.053283,0.034533,0.022819,-0.008175,-0.023188,-0.048658,-0.061334,-0.076849,-0.067055,-0.123123,-0.014683,-0.028686,-0.037439,-0.075944,-0.013737,-0.039532,-0.053951,-0.026642,-0.043949,-0.003478,-5.5,20.0,-83.5,471.0,-24.5,7.0,51.5,0.009189,-0.021731,0.058678,-0.001694,0.002849,0.011674,-0.031775,0.057443,-0.010329,-0.005026,0.00379,0.089011,0.019352,0.024005,0.034882,-0.07771,-0.072497,-0.061402,0.005639,0.015328,0.009605,-99.0,-181.5,-224.0,184.0,-309.0,-12.0,-491.0,-0.014047,-0.014294,0.052755,-0.003937,0.029872,-0.036166,-0.000786,0.071511,0.008157,0.046087,-0.026451,0.076075,0.009398,0.048915,-0.026867,-0.069763,-0.02943,-0.10744,0.040113,-0.037541,-0.078226,106.5,46.5,213.5,287.0,367.0,-23.0,230.5,-0.0232,0.004681,0.006054,0.003657,-0.034493,-0.00459,0.028221,0.028802,0.02429,-0.01463,0.016162,0.002006,-0.000151,-0.043787,-0.010367,-0.002151,-0.045928,-0.012734,-0.042127,-0.010533,0.032766,-244.5,-123.0,39.5,161.0,12.0,37.0,-259.0,0.036071,0.059838,0.069521,0.046876,0.062107,0.026547,0.026902,0.039678,0.020198,0.027592,-0.005305,0.013792,-0.004272,0.000405,-0.032216,-0.017936,-0.013187,-0.046554,0.004493,-0.028154,-0.03184,-55.5,-230.0,-356.0,-447.5,-471.5,68.0,-540.5,-0.024067,-0.029929,-0.028969,-0.016879,0.02283,-0.026913,-0.007668,-0.008485,0.001822,0.04825,-0.008176,-0.001127,0.009235,0.055251,-0.001203,0.010748,0.054478,-0.00011,0.039987,-0.011092,-0.051995,-0.033067,-0.339289,0.215588,-0.289247,0.474134,-0.334575,0.192196,-0.383011,0.04761,-0.204247,-0.457179,-1.132364,-0.315197,-1.852972,2.25568,1.894983,1.484573,-0.935769,0.317548,-0.01259,-0.064717,0.03755,0.049441,0.000172,-0.012838,-0.000741,0.013468,0.014698,0.004803,0.03536,-0.141799,-0.369501,0.140318,-0.2896,0.58035,0.117744,0.325825,-0.055987,0.127423,0.041105,2
2,100581,3,1,1,1,2,0,0,1,1,0,9,9,-4000,3,-48000.0,1.45,-164.69697,-275.69697,-403.19697,-252.530303,-485.424242,-54.80303,-555.409091,-0.015605,-0.043642,0.031351,-0.003457,0.056612,-0.042885,-0.029883,0.050356,0.010347,0.079844,-0.031936,0.079724,0.038469,0.110105,-0.003282,-0.045823,0.035956,-0.088523,0.081535,-0.044157,-0.122077,-27.515169,-39.798716,-89.140607,87.477246,-71.668028,-27.505251,-113.982497,-0.00095,-0.017604,0.023593,0.00093,0.007921,-0.013601,-0.017343,0.028012,0.001912,0.010028,-0.01393,0.044333,0.017778,0.025135,0.002722,-0.030924,-0.020746,-0.045888,0.010107,-0.016411,-0.025337,-32.58263,-40.047134,-97.124964,124.080012,-75.249782,-30.361362,-140.287169,0.001815,-0.017179,0.031837,0.002501,0.009534,-0.017125,-0.019748,0.034897,0.0012,0.009598,-0.020488,0.053939,0.019339,0.026885,-0.001613,-0.039202,-0.028689,-0.060137,0.010325,-0.022375,-0.031328,-33.197743,-36.078723,-90.899807,113.388201,-59.143069,-29.932689,-127.311154,0.003695,-0.013555,0.030448,0.005155,0.009535,-0.012832,-0.017926,0.031502,0.002513,0.007943,-0.017662,0.049097,0.019124,0.023931,-0.000531,-0.033335,-0.026482,-0.053406,0.006706,-0.020996,-0.026798,-42.140292,-49.500834,-114.163801,97.502177,-94.3009,-29.216079,-162.276085,0.003116,-0.015727,0.031155,0.003423,0.01274,-0.015431,-0.019626,0.032731,0.001069,0.012042,-0.019962,0.052122,0.019247,0.030029,-0.001243,-0.036115,-0.022963,-0.057132,0.013036,-0.021816,-0.033951,-118.5,-66.0,-126.0,-259.5,-217.0,-35.0,-223.0,0.045899,0.018317,0.019766,0.020918,0.047037,-0.004375,-0.028245,-0.015543,-0.01794,0.020015,-0.050976,0.007676,0.007137,0.041868,-0.023237,-0.001275,0.044893,-0.029637,0.044969,-0.030068,-0.066561,-3.0,51.0,-20.0,278.0,43.0,-23.5,14.0,0.036952,-0.011545,0.043936,0.015827,-0.000663,0.01138,-0.049359,0.022655,-0.017108,-0.024754,-0.025943,0.058577,0.027783,0.006437,0.023349,-0.045779,-0.067081,-0.043675,-0.017328,-0.006837,0.008572,-3.0,59.0,23.0,189.0,77.5,-11.0,47.0,0.042585,0.019687,0.062731,0.035053,0.000456,0.037823,-0.023494,0.029189,-0.004201,-0.027012,-0.005241,0.049611,0.018174,-0.011508,0.018279,-0.035769,-0.063254,-0.033099,-0.029845,-0.000843,0.02241,7.0,145.0,73.0,427.5,202.5,-26.0,107.5,0.111316,0.058105,0.164168,0.104146,-0.00572,0.095651,-0.053765,0.069467,-0.001271,-0.070206,-0.016239,0.116044,0.050239,-0.037439,0.037482,-0.075782,-0.157093,-0.080512,-0.085487,-0.014136,0.054999,12.0,138.0,73.0,427.5,56.0,-26.0,-11.0,0.101452,0.053283,0.159695,0.022819,-0.008175,-0.023188,-0.048658,0.074067,-0.076849,-0.067055,-0.123123,0.116044,-0.028686,-0.037439,-0.075944,-0.154665,-0.157093,-0.172657,-0.026642,-0.043949,-0.003478,-262.5,-425.5,-513.5,-380.0,-718.5,-69.0,-769.0,-0.0155,-0.01362,0.042355,0.001649,0.082865,-0.025851,0.001295,0.06299,0.016208,0.10923,-0.01385,0.065129,0.015891,0.113678,-0.015664,-0.051579,0.050673,-0.083836,0.102146,-0.032505,-0.134215,-99.0,-182.0,-262.5,184.0,-154.5,-3.0,-323.0,-0.014195,-0.023836,0.052755,0.012016,0.031142,-0.014721,-0.01056,0.071643,0.026333,0.047617,-0.002569,0.085268,0.037725,0.05987,0.00767,-0.049874,-0.027958,-0.081763,0.021793,-0.032021,-0.05407,149.0,-253.5,-347.5,103.5,-183.0,-23.0,500.5,-0.110516,-0.113834,-0.029444,-0.06072,-0.047326,0.01374,-0.007878,0.073275,0.03512,0.056781,0.118001,0.084073,0.04453,0.066444,0.131647,-0.039694,-0.019006,0.051398,0.020537,0.091273,0.069859,-60.0,101.0,198.0,274.0,12.0,41.0,-259.0,0.03275,0.040431,0.04286,0.012384,0.020636,-0.01026,0.009895,0.014438,-0.015312,-0.011857,-0.042517,0.004962,-0.025894,-0.021874,-0.054536,-0.031629,-0.026432,-0.060533,0.005081,-0.028154,-0.032458,-55.5,-230.0,-356.0,-447.5,-471.5,68.0,-540.5,-0.024067,-0.029929,-0.028969,-0.016879,0.02283,-0.026913,-0.007668,-0.008485,0.001822,0.04825,-0.008176,-0.001127,0.009235,0.055251,-0.001203,0.010748,0.054478,-0.00011,0.039987,-0.011092,-0.051995,-0.090638,-0.334273,0.183524,-0.291188,0.41615,-0.319477,0.193004,-0.373726,0.048982,-0.208223,-0.454973,-1.136048,-0.315197,-1.852972,2.25568,1.894983,1.484573,-0.935769,1.484573,-0.935769,-0.064717,0.03755,-0.016654,-0.017958,-0.000264,0.003411,0.014788,0.015041,0.00384,0.032088,-0.138816,-0.426148,0.142993,-0.29638,0.566382,0.102496,0.324569,-0.054169,0.141311,-0.01406,2
3,101101,3,1,1,1,2,0,1,1,1,1,5,5,-2600,0,-31200.0,13.260001,-14.939394,0.636364,27.621212,77.030303,7.893939,-2.954545,130.893939,0.00993,0.022391,0.015451,0.008472,0.006049,0.045525,0.012832,0.010223,0.001147,-0.000614,0.039783,0.000253,-0.009777,-0.010514,0.02764,-0.01169,-0.013956,0.02597,-0.002136,0.038534,0.038246,-47.161215,-76.033294,-151.121495,58.03271,-161.051402,-33.998832,-222.530958,-0.00468,-0.027432,0.027932,-0.004729,0.014513,-0.028078,-0.023801,0.036052,-0.001125,0.020776,-0.026109,0.058928,0.020702,0.042346,-0.003412,-0.042913,-0.016502,-0.067156,0.026198,-0.025627,-0.049945,-33.770597,-45.20393,-112.981514,131.078166,-87.385735,-31.04818,-168.781368,0.000239,-0.022756,0.033807,0.000993,0.009936,-0.023999,-0.023901,0.038387,0.000908,0.011347,-0.026372,0.061222,0.022813,0.032152,-0.003528,-0.043502,-0.030433,-0.069891,0.012808,-0.028027,-0.039028,-42.330088,-52.315837,-120.482009,107.712084,-88.000645,-31.469758,-167.869358,0.002109,-0.018543,0.033221,0.004521,0.01275,-0.017373,-0.021504,0.035968,0.003225,0.012954,-0.021065,0.057062,0.023156,0.03242,-0.00051,-0.037508,-0.025529,-0.061845,0.011822,-0.025291,-0.035997,-45.309761,-55.469485,-123.335196,86.059107,-104.044956,-29.352504,-172.481277,0.002428,-0.016756,0.030543,0.0032,0.013772,-0.015812,-0.020001,0.032585,0.001436,0.013774,-0.019706,0.052385,0.019998,0.032292,-0.000624,-0.035434,-0.020748,-0.056707,0.014588,-0.021985,-0.035718,-46.5,-20.0,-11.0,-138.0,-12.5,-20.0,-48.0,0.024768,0.029419,0.006492,0.024712,0.019441,0.014277,0.004232,-0.011302,0.007137,0.005746,-0.009744,-0.013455,0.003341,0.002557,-0.013611,0.023123,0.023185,0.004349,-0.00023,-0.016915,-0.014424,-3.0,51.0,-20.0,361.5,43.0,-23.5,14.0,0.036952,-0.011545,0.071079,0.015827,-0.000663,0.01138,-0.049359,0.051,-0.017108,-0.024754,-0.025943,0.089051,0.027783,0.006437,0.023349,-0.079158,-0.096296,-0.074066,-0.017328,-0.006837,0.008572,-3.0,49.0,23.0,189.0,28.0,-24.0,-2.0,0.03608,0.019687,0.062731,0.01461,-0.000724,0.001158,-0.016842,0.035245,-0.019898,-0.024125,-0.035452,0.049611,-0.003547,-0.012869,-0.018707,-0.058282,-0.065198,-0.065348,-0.013559,-0.014132,0.001484,12.0,145.0,73.0,427.5,202.5,-26.0,107.5,0.106496,0.053283,0.159695,0.099502,-0.008175,0.09084,-0.053765,0.069467,-0.001271,-0.070206,-0.016239,0.116044,0.050239,-0.037439,0.037482,-0.075782,-0.157093,-0.080512,-0.085487,-0.014136,0.054999,12.0,138.0,73.0,93.0,56.0,-26.0,-11.0,0.101452,0.053283,0.034533,0.022819,-0.008175,-0.023188,-0.048658,-0.061334,-0.076849,-0.067055,-0.123123,-0.014683,-0.028686,-0.037439,-0.075944,-0.013737,-0.039532,-0.053951,-0.026642,-0.043949,-0.003478,-5.5,20.0,-83.5,471.0,-24.5,7.0,51.5,0.009189,-0.021731,0.058678,-0.001694,0.002849,0.011674,-0.031775,0.057443,-0.010329,-0.005026,0.00379,0.089011,0.019352,0.024005,0.034882,-0.07771,-0.072497,-0.061402,0.005639,0.015328,0.009605,-99.0,-182.0,-224.0,184.0,-309.0,-11.5,-491.0,-0.014195,-0.014294,0.052755,-0.003937,0.02994,-0.036166,-0.000637,0.071643,0.008288,0.046299,-0.026311,0.076075,0.009398,0.048994,-0.026867,-0.069763,-0.029348,-0.10744,0.040194,-0.037541,-0.078308,106.5,60.5,241.5,287.0,360.5,-23.0,228.0,-0.019764,0.009812,0.006054,0.003413,-0.034493,-0.00484,0.030202,0.025487,0.02105,-0.017988,0.012707,-0.003438,-0.005617,-0.049265,-0.015977,-0.002495,-0.045928,-0.013044,-0.041867,-0.010503,0.032461,-244.5,-123.0,39.5,161.0,12.0,41.0,-259.0,0.036071,0.059838,0.069521,0.046876,0.062716,0.026547,0.026902,0.039678,0.020198,0.028232,-0.005305,0.013792,-0.004272,0.001044,-0.032216,-0.017936,-0.01256,-0.046554,0.005081,-0.028154,-0.032458,-55.5,-230.0,-356.0,-447.5,-471.5,68.0,-540.5,-0.024067,-0.029929,-0.028969,-0.016879,0.02283,-0.026913,-0.007668,-0.008485,0.001822,0.04825,-0.008176,-0.001127,0.009235,0.055251,-0.001203,0.010748,0.054478,-0.00011,0.039987,-0.011092,-0.051995,-0.033067,-0.339289,0.215588,-0.289247,0.474134,-0.334575,0.191111,-0.382774,0.045807,-0.205288,-0.457179,-1.132364,-0.315197,-1.852972,2.25568,1.894983,1.484573,-0.935769,0.317548,-0.01259,-0.064717,0.03755,0.049441,0.000172,-0.012838,-0.000741,0.013468,0.014698,0.004803,0.03536,-0.141799,-0.369501,0.140318,-0.2896,0.58035,0.117744,0.32484,-0.055841,0.126476,0.03839,2
4,101236,3,1,1,1,2,0,0,1,0,0,5,5,-12000,1,-14000.0,0.75,-53.857143,-88.542857,-111.785714,-78.628571,-155.4,-40.057143,-88.185714,-0.006368,-0.010192,0.008708,-0.004386,0.014595,0.004125,-0.004076,0.01549,0.001108,0.021989,0.010648,0.019814,0.004997,0.026536,0.014861,-0.01593,0.008281,-0.005948,0.024069,0.009835,-0.013897,-43.28314,-80.85,-157.323256,38.322674,-174.061628,-34.174419,-228.562791,-0.008718,-0.031663,0.023307,-0.008717,0.012874,-0.031435,-0.024057,0.034295,-0.001945,0.022269,-0.025775,0.057283,0.020131,0.044134,-0.002808,-0.041838,-0.012791,-0.064811,0.028838,-0.024384,-0.051289,-35.631335,-50.18274,-119.333333,130.47075,-95.682016,-30.25934,-178.762525,-0.000768,-0.023989,0.034239,0.000401,0.010735,-0.02555,-0.024152,0.039788,0.001149,0.013108,-0.027064,0.062854,0.023317,0.03422,-0.003969,-0.044895,-0.029996,-0.072105,0.014652,-0.028986,-0.041701,-34.406753,-40.581325,-102.252674,117.112772,-67.622567,-30.478928,-142.486983,0.002472,-0.0167,0.031492,0.004224,0.009815,-0.015691,-0.019945,0.033659,0.002549,0.009257,-0.019569,0.053208,0.021041,0.027129,-0.000494,-0.035621,-0.027266,-0.057681,0.008197,-0.022989,-0.030209,-44.377604,-53.057406,-119.754155,91.183177,-98.896836,-28.87539,-168.35115,0.002831,-0.01616,0.030973,0.003517,0.013466,-0.01547,-0.019781,0.032682,0.001425,0.013065,-0.019715,0.052272,0.019791,0.031336,-0.000841,-0.035525,-0.021657,-0.056788,0.013765,-0.021975,-0.034896,-7.5,24.5,39.5,-199.0,50.5,-20.0,109.5,0.018974,0.028797,-0.024408,0.013271,0.001388,0.050358,0.009799,-0.043997,-0.001168,-0.012175,0.034265,-0.050301,-0.009231,-0.018523,0.024392,0.053763,0.037921,0.078692,-0.01519,0.033105,0.041445,-3.0,51.0,-20.0,154.0,43.0,-23.5,14.0,0.036952,-0.011545,0.026228,0.015827,-0.000663,0.01138,-0.049359,0.001533,-0.017108,-0.024754,-0.025943,0.03851,0.027783,0.006437,0.023349,-0.019598,-0.039724,-0.02215,-0.017328,-0.006837,0.008572,-3.0,59.0,23.0,189.0,77.5,-11.0,47.0,0.042585,0.019687,0.062731,0.035053,0.000456,0.037823,-0.023494,0.029189,-0.004201,-0.027012,-0.005241,0.049611,0.018174,-0.011508,0.018279,-0.035769,-0.063254,-0.033099,-0.029845,-0.000843,0.02241,12.0,145.0,73.0,427.5,202.5,-26.0,107.5,0.106496,0.053283,0.159695,0.099502,-0.008175,0.09084,-0.053765,0.069467,-0.001271,-0.070206,-0.016239,0.116044,0.050239,-0.037439,0.037482,-0.075782,-0.157093,-0.080512,-0.085487,-0.014136,0.054999,12.0,138.0,73.0,427.5,56.0,-26.0,-11.0,0.101452,0.053283,0.159695,0.022819,-0.008175,-0.023188,-0.048658,0.074067,-0.076849,-0.067055,-0.123123,0.116044,-0.028686,-0.037439,-0.075944,-0.154665,-0.157093,-0.172657,-0.026642,-0.043949,-0.003478,-181.5,-199.5,-300.5,179.0,-220.5,-4.0,-97.5,0.010401,-0.008928,0.084962,0.027424,0.058591,0.045966,-0.019752,0.083018,0.019951,0.054429,0.03911,0.104469,0.039533,0.074962,0.059372,-0.068841,-0.030794,-0.04914,0.038059,0.019348,-0.018525,-2.0,-104.0,-274.0,184.0,-204.0,-23.0,-318.0,-0.027789,-0.061572,0.023453,-0.022276,-0.002612,-0.045754,-0.035795,0.051359,0.000509,0.023492,-0.022948,0.087709,0.03411,0.059128,0.011244,-0.055828,-0.031222,-0.080618,0.024472,-0.024796,-0.049629,103.5,-34.0,-179.5,184.0,176.5,19.0,500.5,-0.042103,-0.068662,-0.005708,-0.009734,-0.027475,0.026791,-0.02907,0.03374,0.026959,0.01116,0.06815,0.06355,0.05583,0.040334,0.099388,-0.005554,-0.024403,0.03918,-0.018509,0.044845,0.063137,3.0,158.0,265.0,274.0,12.0,41.0,-259.0,0.02782,0.035047,0.028772,0.000224,0.005561,-0.023283,0.009186,0.004091,-0.024654,-0.022562,-0.052342,-0.005013,-0.035264,-0.031905,-0.064231,-0.031629,-0.026432,-0.060533,0.005081,-0.028154,-0.032458,-55.5,-230.0,-356.0,-447.5,-471.5,68.0,-540.5,-0.024067,-0.029929,-0.028969,-0.016879,0.02283,-0.026913,-0.007668,-0.008485,0.001822,0.04825,-0.008176,-0.001127,0.009235,0.055251,-0.001203,0.010748,0.054478,-0.00011,0.039987,-0.011092,-0.051995,-0.062853,-0.350067,0.255025,-0.25236,0.363957,-0.367007,0.191189,-0.385291,0.049205,-0.207326,-0.457179,-1.132364,1.153745,-0.229618,1.858879,1.707094,1.484573,-0.935769,1.484573,-0.935769,-0.064717,0.03755,-0.016654,-0.017958,-0.000264,0.003411,0.014788,0.015041,0.004133,0.032205,-0.15278,-0.411878,0.28187,-0.109645,0.397214,-0.054775,0.328725,-0.063053,0.142149,-0.01336,2


In [17]:
# DV as Quantiles
#df['pscores_2011'] = pd.qcut(df['pscores_2011'], 3, labels=False)
#df['pscores_2011'].value_counts()
df['pscores_bin'] = df['pscores'] < 0

In [18]:
#df = df.drop(['pscores_2011'], axis=1)

In [19]:
# Keep only 2011 columns, but include viirs_2012
## VIIRS Columns
df_viirs = df.filter(regex='viirs').filter(regex='_2km')
df_landsat = df.filter(regex='^b').filter(regex='_1km')
df_y = df.filter(regex='^pscores_bin$')

df_all = df_y.join(df_viirs).join(df_landsat)
df_all.head()

Unnamed: 0,pscores_bin,viirs_spatialmean_monthlymean_buff_2km,viirs_spatialmean_monthlysd_buff_2km,viirs_spatialmax_monthlymean_buff_2km,viirs_spatialmax_monthlysd_buff_2km,viirs_spatialmin_monthlymean_buff_2km,viirs_spatialmin_monthlysd_buff_2km,viirs_spatialsd_monthlymean_buff_2km,viirs_spatialsd_monthlysd_buff_2km,b1_buff_1km_mean,b2_buff_1km_mean,b3_buff_1km_mean,b4_buff_1km_mean,b5_buff_1km_mean,b6_buff_1km_mean,b7_buff_1km_mean,b12_buff_1km_mean,b13_buff_1km_mean,b14_buff_1km_mean,b15_buff_1km_mean,b16_buff_1km_mean,b17_buff_1km_mean,b23_buff_1km_mean,b24_buff_1km_mean,b25_buff_1km_mean,b26_buff_1km_mean,b27_buff_1km_mean,b34_buff_1km_mean,b35_buff_1km_mean,b36_buff_1km_mean,b37_buff_1km_mean,b45_buff_1km_mean,b46_buff_1km_mean,b47_buff_1km_mean,b56_buff_1km_mean,b57_buff_1km_mean,b67_buff_1km_mean,b1_buff_1km_min,b2_buff_1km_min,b3_buff_1km_min,b4_buff_1km_min,b5_buff_1km_min,b6_buff_1km_min,b7_buff_1km_min,b12_buff_1km_min,b13_buff_1km_min,b14_buff_1km_min,b15_buff_1km_min,b16_buff_1km_min,b17_buff_1km_min,b23_buff_1km_min,b24_buff_1km_min,b25_buff_1km_min,b26_buff_1km_min,b27_buff_1km_min,b34_buff_1km_min,b35_buff_1km_min,b36_buff_1km_min,b37_buff_1km_min,b45_buff_1km_min,b46_buff_1km_min,b47_buff_1km_min,b56_buff_1km_min,b57_buff_1km_min,b67_buff_1km_min,b1_buff_1km_max,b2_buff_1km_max,b3_buff_1km_max,b4_buff_1km_max,b5_buff_1km_max,b6_buff_1km_max,b7_buff_1km_max,b12_buff_1km_max,b13_buff_1km_max,b14_buff_1km_max,b15_buff_1km_max,b16_buff_1km_max,b17_buff_1km_max,b23_buff_1km_max,b24_buff_1km_max,b25_buff_1km_max,b26_buff_1km_max,b27_buff_1km_max,b34_buff_1km_max,b35_buff_1km_max,b36_buff_1km_max,b37_buff_1km_max,b45_buff_1km_max,b46_buff_1km_max,b47_buff_1km_max,b56_buff_1km_max,b57_buff_1km_max,b67_buff_1km_max
0,False,0.244809,-0.254202,1.153745,-0.229618,-0.016654,-0.017958,0.279187,-0.107712,-38.13466,-54.520337,-124.118536,126.720947,-98.793725,-30.003341,-184.361418,-0.001151,-0.024045,0.034539,0.000952,0.011668,-0.025312,-0.023822,0.040444,0.002119,0.014496,-0.026433,0.063285,0.024047,0.035483,-0.003654,-0.044497,-0.029081,-0.072213,0.01519,-0.029438,-0.04274,-3.0,59.0,23.0,189.0,77.5,-10.5,47.0,0.042585,0.019687,0.062731,0.035053,0.000501,0.037823,-0.023494,0.029189,-0.004201,-0.02696,-0.005241,0.049611,0.018174,-0.011457,0.018279,-0.035769,-0.063184,-0.033099,-0.02978,-0.000843,0.02236,19.5,-34.0,-179.5,102.0,-183.0,42.0,500.5,-0.014714,-0.042357,0.007604,-0.025551,0.000469,0.050267,-0.02907,0.022054,-0.01432,0.014673,0.06815,0.05136,0.01279,0.043953,0.099388,-0.03947,-0.008266,0.051623,0.031011,0.091273,0.05949
1,False,0.215588,-0.289247,-0.315197,-1.852972,0.049441,0.000172,0.140318,-0.2896,-33.10828,-43.913289,-111.013752,132.278083,-84.460046,-30.926896,-165.91271,0.000404,-0.022373,0.033712,0.001183,0.009684,-0.023583,-0.023674,0.038133,0.000974,0.010918,-0.026086,0.060768,0.022672,0.031519,-0.003462,-0.043124,-0.030655,-0.06933,0.012211,-0.027814,-0.038265,-3.0,49.0,23.0,189.0,28.0,-24.0,-2.0,0.03608,0.019687,0.062731,0.01461,-0.000724,0.001158,-0.016842,0.035245,-0.019898,-0.024125,-0.035452,0.049611,-0.003547,-0.012869,-0.018707,-0.058282,-0.065198,-0.065348,-0.013559,-0.014132,0.001484,106.5,46.5,213.5,287.0,367.0,-23.0,230.5,-0.0232,0.004681,0.006054,0.003657,-0.034493,-0.00459,0.028221,0.028802,0.02429,-0.01463,0.016162,0.002006,-0.000151,-0.043787,-0.010367,-0.002151,-0.045928,-0.012734,-0.042127,-0.010533,0.032766
2,False,0.183524,-0.291188,-0.315197,-1.852972,-0.016654,-0.017958,0.142993,-0.29638,-32.58263,-40.047134,-97.124964,124.080012,-75.249782,-30.361362,-140.287169,0.001815,-0.017179,0.031837,0.002501,0.009534,-0.017125,-0.019748,0.034897,0.0012,0.009598,-0.020488,0.053939,0.019339,0.026885,-0.001613,-0.039202,-0.028689,-0.060137,0.010325,-0.022375,-0.031328,-3.0,59.0,23.0,189.0,77.5,-11.0,47.0,0.042585,0.019687,0.062731,0.035053,0.000456,0.037823,-0.023494,0.029189,-0.004201,-0.027012,-0.005241,0.049611,0.018174,-0.011508,0.018279,-0.035769,-0.063254,-0.033099,-0.029845,-0.000843,0.02241,149.0,-253.5,-347.5,103.5,-183.0,-23.0,500.5,-0.110516,-0.113834,-0.029444,-0.06072,-0.047326,0.01374,-0.007878,0.073275,0.03512,0.056781,0.118001,0.084073,0.04453,0.066444,0.131647,-0.039694,-0.019006,0.051398,0.020537,0.091273,0.069859
3,False,0.215588,-0.289247,-0.315197,-1.852972,0.049441,0.000172,0.140318,-0.2896,-33.770597,-45.20393,-112.981514,131.078166,-87.385735,-31.04818,-168.781368,0.000239,-0.022756,0.033807,0.000993,0.009936,-0.023999,-0.023901,0.038387,0.000908,0.011347,-0.026372,0.061222,0.022813,0.032152,-0.003528,-0.043502,-0.030433,-0.069891,0.012808,-0.028027,-0.039028,-3.0,49.0,23.0,189.0,28.0,-24.0,-2.0,0.03608,0.019687,0.062731,0.01461,-0.000724,0.001158,-0.016842,0.035245,-0.019898,-0.024125,-0.035452,0.049611,-0.003547,-0.012869,-0.018707,-0.058282,-0.065198,-0.065348,-0.013559,-0.014132,0.001484,106.5,60.5,241.5,287.0,360.5,-23.0,228.0,-0.019764,0.009812,0.006054,0.003413,-0.034493,-0.00484,0.030202,0.025487,0.02105,-0.017988,0.012707,-0.003438,-0.005617,-0.049265,-0.015977,-0.002495,-0.045928,-0.013044,-0.041867,-0.010503,0.032461
4,False,0.255025,-0.25236,1.153745,-0.229618,-0.016654,-0.017958,0.28187,-0.109645,-35.631335,-50.18274,-119.333333,130.47075,-95.682016,-30.25934,-178.762525,-0.000768,-0.023989,0.034239,0.000401,0.010735,-0.02555,-0.024152,0.039788,0.001149,0.013108,-0.027064,0.062854,0.023317,0.03422,-0.003969,-0.044895,-0.029996,-0.072105,0.014652,-0.028986,-0.041701,-3.0,59.0,23.0,189.0,77.5,-11.0,47.0,0.042585,0.019687,0.062731,0.035053,0.000456,0.037823,-0.023494,0.029189,-0.004201,-0.027012,-0.005241,0.049611,0.018174,-0.011508,0.018279,-0.035769,-0.063254,-0.033099,-0.029845,-0.000843,0.02241,103.5,-34.0,-179.5,184.0,176.5,19.0,500.5,-0.042103,-0.068662,-0.005708,-0.009734,-0.027475,0.026791,-0.02907,0.03374,0.026959,0.01116,0.06815,0.06355,0.05583,0.040334,0.099388,-0.005554,-0.024403,0.03918,-0.018509,0.044845,0.063137


In [16]:
# Drop columns where the label is missing
#df = df.loc[~pd.isnull(df['hhinc_2011'])]

#df.shape

## 2. Split data into test/train

In [20]:
LABEL = 'pscores_bin'
TEST_SIZE = 0.3

# Separate feature sets from label sets
x_df = df_all.drop(labels=[LABEL], axis=1)
y_df = df_all[LABEL]

# Split into test and train sets for features and labels
x_train, x_test, y_train, y_test =  train_test_split(x_df, y_df, test_size=TEST_SIZE)

## 3. Preprocess data

All vars are numeric - impute missing data with mean

In [32]:
x_train.head()

Unnamed: 0,dmspols_2011,l7_2011_1,l7_2011_2,l7_2011_3,l7_2011_4,l7_2011_5,l7_2011_6,l7_2011_7,viirs_2012,uid,dmspols_2011_imputed,viirs_2012_imputed,ratio_1_2,ratio_1_3,ratio_1_4,ratio_1_5,ratio_1_6,ratio_1_7,ratio_2_3,ratio_2_4,ratio_2_5,ratio_2_6,ratio_2_7,ratio_3_4,ratio_3_5,ratio_3_6,ratio_3_7,ratio_4_5,ratio_4_6,ratio_4_7,ratio_5_6,ratio_5_7,ratio_6_7
374,24.0,901.910885,1276.550363,1411.094775,2996.116401,2546.972714,3031.464877,1907.712192,1.229348,4900320,0,0,0.171974,0.220139,0.537248,0.476984,0.541406,0.357984,0.05006,0.402457,0.332265,0.40736,0.198213,0.359643,0.286978,0.364738,0.149637,0.081028,0.005864,0.22195,0.086851,0.143503,0.227518
2388,31.0,1010.649942,1344.68576,1589.68199,2445.176189,2589.602233,3018.334252,2046.523202,4.102749,25703397,0,0,0.141821,0.222676,0.415104,0.438567,0.49831,0.338834,0.083492,0.290377,0.316427,0.383599,0.206958,0.212026,0.239256,0.310036,0.125637,0.028686,0.104907,0.088753,0.076451,0.117141,0.191873
3461,6.5,1016.671152,1320.956019,1479.582899,2359.837384,2234.348958,3026.724826,1685.98452,0.30112,37700984,0,0,0.130168,0.185443,0.397797,0.374553,0.49712,0.24765,0.056642,0.282244,0.25691,0.39234,0.121395,0.229268,0.203226,0.343328,0.065202,0.027315,0.123806,0.166555,0.150611,0.139877,0.284495
3662,0.0,617.041618,787.066415,809.077871,2105.904002,1763.348318,2938.720708,1204.052204,0.109788,39800512,0,0,0.121091,0.134656,0.546784,0.481563,0.652934,0.32234,0.01379,0.455877,0.382793,0.577503,0.209423,0.444883,0.370961,0.568238,0.196199,0.088533,0.16509,0.272466,0.249969,0.18848,0.418722
83,18.0,880.414847,1152.104803,1318.878894,2572.97933,2284.983406,3030.307132,1783.694032,1.105853,1702700,0,0,0.133672,0.199366,0.490116,0.443726,0.549743,0.339055,0.067493,0.381434,0.329604,0.449072,0.215134,0.322237,0.268075,0.393505,0.149816,0.059283,0.081618,0.181167,0.140223,0.123207,0.258956


In [33]:
x_test.head()

Unnamed: 0,dmspols_2011,l7_2011_1,l7_2011_2,l7_2011_3,l7_2011_4,l7_2011_5,l7_2011_6,l7_2011_7,viirs_2012,uid,dmspols_2011_imputed,viirs_2012_imputed,ratio_1_2,ratio_1_3,ratio_1_4,ratio_1_5,ratio_1_6,ratio_1_7,ratio_2_3,ratio_2_4,ratio_2_5,ratio_2_6,ratio_2_7,ratio_3_4,ratio_3_5,ratio_3_6,ratio_3_7,ratio_4_5,ratio_4_6,ratio_4_7,ratio_5_6,ratio_5_7,ratio_6_7
3145,6.0,1067.911645,1347.568656,1496.182648,2448.161935,2328.447567,3004.75898,1737.813586,0.337368,34902063,0,0,0.115777,0.167026,0.392554,0.371143,0.475572,0.238762,0.05226,0.289956,0.266832,0.38076,0.126482,0.241353,0.217607,0.335169,0.074716,0.025063,0.102073,0.169697,0.126811,0.145252,0.267143
3888,0.0,909.066599,1054.835316,1108.65148,1493.869704,1341.081399,2903.133198,1080.304266,0.1739,43301419,0,0,0.074224,0.098916,0.24337,0.191994,0.523075,0.086076,0.024875,0.172258,0.119472,0.466981,0.011928,0.148017,0.09488,0.447303,0.01295,0.053895,0.320505,0.160659,0.368043,0.107697,0.457602
1117,7.666667,843.911082,1138.947636,1333.943429,2591.361184,2285.129243,3031.391065,1782.293734,0.276674,14502621,0,0,0.148794,0.225007,0.508679,0.460594,0.564467,0.357315,0.078853,0.389355,0.334742,0.453787,0.22023,0.320336,0.262826,0.388847,0.143876,0.062798,0.078259,0.184987,0.140367,0.123625,0.259489
3348,10.0,1188.81887,1759.164822,2251.561736,3211.930984,3842.226267,3058.202097,3118.564065,0.925885,36901045,0,0,0.19347,0.308903,0.45972,0.527407,0.440163,0.448009,0.12277,0.292243,0.371883,0.269657,0.278695,0.175779,0.261031,0.151916,0.161449,0.089351,0.024518,0.014749,0.11362,0.103963,0.009772
5245,2.0,1543.693146,2214.106303,2833.584665,3405.777374,4622.612692,3047.386146,3561.18603,0.158197,61200110,0,0,0.178406,0.294679,0.376219,0.499313,0.327525,0.395209,0.122725,0.212046,0.35229,0.158373,0.233249,0.091707,0.239938,0.036355,0.113781,0.151567,0.055537,0.022307,0.205375,0.129699,0.077747


In [34]:
# check that lengths match
print(len(x_train) == len(y_train))
print(len(x_test) == len(y_test))

True
True


In [34]:
DAY_FEATURES = df_all.filter(regex='^b', axis=1).columns.tolist()
NIGHT_FEATURES = df_all.filter(regex='viirs', axis=1).columns.tolist()
ALL_FEATURES = DAY_FEATURES + NIGHT_FEATURES

print("Day-only:", DAY_FEATURES)
print("-----")
print("Night-only:", NIGHT_FEATURES)

Day-only: ['b1_buff_1km_mean', 'b2_buff_1km_mean', 'b3_buff_1km_mean', 'b4_buff_1km_mean', 'b5_buff_1km_mean', 'b6_buff_1km_mean', 'b7_buff_1km_mean', 'b12_buff_1km_mean', 'b13_buff_1km_mean', 'b14_buff_1km_mean', 'b15_buff_1km_mean', 'b16_buff_1km_mean', 'b17_buff_1km_mean', 'b23_buff_1km_mean', 'b24_buff_1km_mean', 'b25_buff_1km_mean', 'b26_buff_1km_mean', 'b27_buff_1km_mean', 'b34_buff_1km_mean', 'b35_buff_1km_mean', 'b36_buff_1km_mean', 'b37_buff_1km_mean', 'b45_buff_1km_mean', 'b46_buff_1km_mean', 'b47_buff_1km_mean', 'b56_buff_1km_mean', 'b57_buff_1km_mean', 'b67_buff_1km_mean', 'b1_buff_1km_min', 'b2_buff_1km_min', 'b3_buff_1km_min', 'b4_buff_1km_min', 'b5_buff_1km_min', 'b6_buff_1km_min', 'b7_buff_1km_min', 'b12_buff_1km_min', 'b13_buff_1km_min', 'b14_buff_1km_min', 'b15_buff_1km_min', 'b16_buff_1km_min', 'b17_buff_1km_min', 'b23_buff_1km_min', 'b24_buff_1km_min', 'b25_buff_1km_min', 'b26_buff_1km_min', 'b27_buff_1km_min', 'b34_buff_1km_min', 'b35_buff_1km_min', 'b36_buff_1km_m

## 5. Train and Evaluate Regressors

### 5.1 Training

In [35]:
x_all = x_test.append(x_train)
y_all = y_test.append(y_train)

In [36]:
# Define a TrainedRegressor object to hold key results information
class TrainedRegressor:
    
    def __init__(self, method, params, features, regressor):
        self.method = method
        self.params = params
        self.regressor = regressor
        self.features = features
    
    def __repr__(self):
        return f'Trained {self.method} on feature set {self.features} with params {self.params}'

In [37]:
# Use GRID_MAIN for full grid search
# parameters = cf.GRID_TEST_CLASS
parameters = GRID_TEST_CLASS

results_df = pd.DataFrame()
results_df_all = pd.DataFrame()
results_df_trainedonly_all = pd.DataFrame()

x_trainedonly_all = x_all.copy()

trained_list = []
trained_list_all = []
count = 0
# print('Training model ', end='')
for i in parameters['regressors']:
    for j in parameters[i]:
        for k in ('DAY_FEATURES', 'NIGHT_FEATURES', 'ALL_FEATURES'):
        
            print(f'Model {count}: Training {i} on {k} with params {str(j)}')

            # A. Train ------------------------------------
            # Initialize regressor, fit data, then append TrainedRegressor object to list
            # 1. Train Data
            regressor = eval(i)(**j)
            trained = regressor.fit(x_train[eval(k)], y_train)
            trained_list.append(TrainedRegressor(i, str(j), k, trained))

            # 2. All Data
            trained_all = regressor.fit(x_all[eval(k)], y_all)
            trained_list_all.append(TrainedRegressor(i, str(j), k, trained_all))
            
            
            
            
            
            
            
            # B. Results -------------------------------------
            # 1. Trained Model on Test Data - - - - - - - - - -
            pred_labels = trained_list[count].regressor.predict(x_test[eval(k)])

            pred_dict = {
                'regressor': trained_list[count].method,
                'features': trained_list[count].features,
                'params': trained_list[count].params,
                'accuracy_score': accuracy_score(y_true=y_test, y_pred=pred_labels)        
            }
    
            results_df = results_df.append(pred_dict, ignore_index=True) \
                .sort_values(by='accuracy_score', ascending=False, axis=0) \
                [['regressor', 'params', 'features', 'accuracy_score']]
        
            results_df.to_csv("/Users/robmarty/Desktop/pov_results_r13.csv")
            
            x_test['y_true'] = y_test
            x_test['y_predict_' + str(count)] = pred_labels
            x_test.to_csv(os.path.join(final_data_file_path, 'Data with Predicted Income', 'pov_opm_data_with_predictions_traineddatamodel_testdatapredict_r13.csv'))
            
            
            
            
            
            
            
            # 2. Trained All Model on All Data - - - - - - - - - -
            pred_labels_all = trained_list_all[count].regressor.predict(x_all[eval(k)])

            # Append results to dataframe and sort by R^2
            pred_dict = {
                'regressor': trained_list_all[count].method,
                'features': trained_list_all[count].features,
                'params': trained_list_all[count].params,
                'accuracy_score': accuracy_score(y_true=y_all, y_pred=pred_labels_all)        
            }
    
            results_df_all = results_df_all.append(pred_dict, ignore_index=True) \
                .sort_values(by='accuracy_score', ascending=False, axis=0) \
                [['regressor', 'params', 'features', 'accuracy_score']]
        
            results_df_all.to_csv("/Users/robmarty/Desktop/pov_results_all_r13.csv")

            # ALL
            x_trainedonly_all['y_true'] = y_all
            x_trainedonly_all['y_predict_' + str(count)] = trained_list_all[count].regressor.predict(x_all[eval(k)])
            x_trainedonly_all.to_csv(os.path.join(final_data_file_path, 'Data with Predicted Income', 'pov_opm_data_with_predictions_alldatamodel_alldatapredict_r13.csv'))
            
            
            
            
            
            
            # 3. Trained Model on All Data - - - - - - - - - -
            pred_labels_trainedonly_all = trained_list[count].regressor.predict(x_all[eval(k)])

            # Append results to dataframe and sort by R^2
            pred_dict = {
                'regressor': trained_list[count].method,
                'features': trained_list[count].features,
                'params': trained_list[count].params,
                'accuracy_score': accuracy_score(y_true=y_all, y_pred=pred_labels_trainedonly_all)        
            }
    
            results_df_trainedonly_all = results_df_trainedonly_all.append(pred_dict, ignore_index=True) \
                .sort_values(by='accuracy_score', ascending=False, axis=0) \
                [['regressor', 'params', 'features', 'accuracy_score']]
        
            results_df_trainedonly_all.to_csv("/Users/robmarty/Desktop/pov_results_trainedonly_all_r13.csv")

            # ALL
            x_all['y_true'] = y_all
            x_all['y_predict_' + str(count)] = trained_list[count].regressor.predict(x_all[eval(k)])
            x_all.to_csv(os.path.join(final_data_file_path, 'Data with Predicted Income', 'pov_opm_data_with_predictions_testdatamodel_alldatapredict_r13.csv'))

            ####
            count += 1


Model 0: Training LinearSVC on DAY_FEATURES with params {'penalty': 'l2', 'C': 0.01, 'loss': 'epsilon_insensitive', 'max_iter': 1000.0, 'random_state': 0}
Model 1: Training LinearSVC on NIGHT_FEATURES with params {'penalty': 'l2', 'C': 0.01, 'loss': 'epsilon_insensitive', 'max_iter': 1000.0, 'random_state': 0}
Model 2: Training LinearSVC on ALL_FEATURES with params {'penalty': 'l2', 'C': 0.01, 'loss': 'epsilon_insensitive', 'max_iter': 1000.0, 'random_state': 0}
Model 3: Training LinearSVC on DAY_FEATURES with params {'penalty': 'l2', 'C': 0.01, 'loss': 'epsilon_insensitive', 'max_iter': 100000.0, 'random_state': 0}
Model 4: Training LinearSVC on NIGHT_FEATURES with params {'penalty': 'l2', 'C': 0.01, 'loss': 'epsilon_insensitive', 'max_iter': 100000.0, 'random_state': 0}
Model 5: Training LinearSVC on ALL_FEATURES with params {'penalty': 'l2', 'C': 0.01, 'loss': 'epsilon_insensitive', 'max_iter': 100000.0, 'random_state': 0}
Model 6: Training LinearSVC on DAY_FEATURES with params {'pe

KeyboardInterrupt: 

In [None]:
#y_test
#pred_labels
#parameters

### 5.2 Prediction and Evaluation

In [None]:
results_df = pd.DataFrame()
for i in trained_list:
    
    # Get predicted results from test data
    features = eval(i.features)
    pred_labels = i.regressor.predict(x_test[features])
    
    # Append results to dataframe and sort by R^2
    pred_dict = {
        'regressor': i.method,
        'features': i.features,
        'params': i.params,
        'r2': r2_score(y_true=y_test, y_pred=pred_labels)        
    }
    
    results_df = results_df.append(pred_dict, ignore_index=True) \
        .sort_values(by='r2', ascending=False, axis=0) \
        [['regressor', 'params', 'features', 'r2']]

results_df.shape

In [None]:
results_df