In [11]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [31]:
atlas_2015 = pd.read_csv('../data/processed/processed_atlas_2015.csv')
atlas_2010 = pd.read_csv('../data/processed/processed_atlas_2010.csv')

In [32]:
ordered_columns = [
    "CensusTract",
    "Urban",
    "PovertyRate",
    "MedianFamilyIncome",
    "TractLOWI",
    "TractKids",
    "TractSeniors",
    "TractHUNV",
    "TractSNAP",
    "lapop1share",
    "lalowi1share",
    "lakids1share",
    "laseniors1share",
    "lahunv1share",
    "POP2010",
    "LOWIRatio",
    "SNAPRatio",
    "HUNVRatio",
    "FoodInsecurityIndex",
    "LOWIWeighted"
]


In [33]:
atlas_dataset_2010 = atlas_2010.copy()
atlas_dataset_2015 = atlas_2015.copy()
atlas_dataset_2010.drop(columns=['Unnamed: 0'])
atlas_dataset_2015.drop(columns=['Unnamed: 0'])
atlas_dataset_2010 = atlas_dataset_2010[ordered_columns]
atlas_dataset_2015 = atlas_dataset_2015[ordered_columns]
pd.set_option('display.max_columns', None)
print(atlas_dataset_2010.tail())
print(atlas_dataset_2015.tail())

       CensusTract  Urban  PovertyRate  MedianFamilyIncome  TractLOWI  \
72526  56043000200      0         2.41            0.261634   0.060898   
72527  56043000301      1         9.48            0.249101   0.061614   
72528  56043000302      1         5.22            0.271133   0.062808   
72529  56045951100      0         7.69            0.290911   0.076023   
72530  56045951300      1         8.10            0.300357   0.087168   

       TractKids  TractSeniors  TractHUNV  TractSNAP  lapop1share  \
72526   0.074631      0.034335   0.010068   0.029425     0.815997   
72527   0.056902      0.023102   0.014524   0.018851     0.000000   
72528   0.051836      0.029877   0.003796   0.029425     0.061109   
72529   0.055298      0.028892   0.007757   0.015632     0.688747   
72530   0.077501      0.037635   0.005611   0.050575     0.361196   

       lalowi1share  lakids1share  laseniors1share  lahunv1share   POP2010  \
72526      0.172932      0.809707         0.795535      0.008969  0.

In [34]:
for col in ordered_columns:
    if col != "CensusTract":
        all_equal = (atlas_dataset_2010[col] == atlas_dataset_2015[col]).all()
        print(f"{col}: {'Identical' if all_equal else 'Different'}")

Urban: Identical
PovertyRate: Different
MedianFamilyIncome: Identical
TractLOWI: Identical
TractKids: Identical
TractSeniors: Identical
TractHUNV: Identical
TractSNAP: Identical
lapop1share: Different
lalowi1share: Different
lakids1share: Different
laseniors1share: Different
lahunv1share: Different
POP2010: Identical
LOWIRatio: Different
SNAPRatio: Different
HUNVRatio: Different
FoodInsecurityIndex: Different
LOWIWeighted: Identical


In [23]:
cluster_labels = pd.read_csv('../data/processed/clustered_atlas_labels.csv')
cluster_labels.head()

Unnamed: 0,CensusTract,Cluster
0,1001020100,0
1,1001020200,0
2,1001020300,0
3,1001020400,2
4,1001020500,2


In [24]:
print(len(cluster_labels))
print(len(atlas_dataset_2010))
print(len(atlas_dataset_2015))
#2019 dataset was already aligned, which is what our cluster labels were created off of. 
#Another alignment is not necessary.

72531
72531
72531


In [27]:
training_atlas_2010 = atlas_dataset_2010.select_dtypes(include='number')
training_atlas_2015 = atlas_dataset_2015.select_dtypes(include='number')
training_dataset = training_atlas_2015 - training_atlas_2010
training_dataset.head()

Unnamed: 0,CensusTract,Urban,PovertyRate,MedianFamilyIncome,TractLOWI,TractKids,TractSeniors,TractHUNV,TractSNAP,lapop1share,lalowi1share,lakids1share,laseniors1share,lahunv1share,POP2010,LOWIRatio,SNAPRatio,HUNVRatio,FoodInsecurityIndex,LOWIWeighted
0,0,0.0,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,-1.086098e-11,0.040171,-0.527048,-0.650294,-0.031467,0.0,0.066946,0.037657,-0.012552,0.09205,0.0
1,0,0.0,3.44,0.0,0.0,0.0,0.0,0.0,0.0,-4.68883e-10,-0.011842,-0.207861,-0.214718,-0.017174,0.0,0.037327,0.063134,-0.012903,0.087558,0.0
2,0,0.0,11.06,0.0,0.0,0.0,0.0,0.0,0.0,-2.66436e-10,0.093641,-0.310036,-0.376475,-0.004658,0.0,0.160984,0.002668,0.002075,0.165728,0.0
3,0,0.0,-3.02,0.0,0.0,0.0,0.0,0.0,0.0,-0.1244433,0.022441,-0.38458,-0.359846,0.00282,0.0,0.063839,-0.007752,0.0,0.056088,0.0
4,0,0.0,2.54,0.0,0.0,0.0,0.0,0.0,0.0,-0.01026874,0.023189,-0.169473,-0.298401,-0.003129,0.0,0.1175,0.032324,-0.002322,0.147501,0.0
