In [1]:
import matplotlib.pyplot as plt
from copy import deepcopy
import pandas as pd
import numpy as np
import importlib
import sklearn
import random
import torch
import math

import socialSig
importlib.reload(socialSig)
from helpers import *

In [2]:
####### Load our Data
from sklearn import preprocessing
devSet = pd.read_csv("./us_migration.csv")
devSet = devSet.loc[:, ~devSet.columns.str.contains('^Unnamed')]
devSet = devSet.apply(lambda x: pd.to_numeric(x, errors='coerce'))
devSet = devSet.dropna(axis=1)

y = torch.Tensor(devSet['US_MIG_05_10'].values)
X = devSet.loc[:, devSet.columns != "US_MIG_05_10"].values

mMScale = preprocessing.MinMaxScaler()
X = mMScale.fit_transform(X)

In [5]:
devSet.loc[:, devSet.columns != "US_MIG_05_10"].shape

(2322, 30)

In [44]:
devSet.columns

Index(['sending', 'sending_citizen_unspecified', 'sending_citizenship_unknown',
       'sending_household_not_owned', 'sending_household_owned',
       'sending_household_owned_unknown', 'sending_indigeneity',
       'sending_internet', 'sending_internet_unknown',
       'sending_marriage_unknown', 'sending_married', 'sending_no_indigeneity',
       'sending_no_internet', 'sending_not_citizen', 'sending_rural',
       'sending_salary_worker', 'sending_self_employed', 'sending_separated',
       'sending_single', 'sending_sum_income', 'sending_total_pop',
       'sending_unknown_employment_status', 'sending_unknown_indigeneity',
       'sending_unpaid_worker', 'sending_urban', 'sending_weighted_avg_income',
       'sending_weighted_avg_income_abroad',
       'sending_weighted_avg_no_income_abroad',
       'sending_weighted_avg_unknown_income_abroad', 'sending_widowed',
       'US_MIG_05_10'],
      dtype='object')

In [38]:
#y - 'number_moved'
#x - 'everything else that is or can be represented as a float.'


####### Build and fit the Model
lr = 1e-7
batchSize = 200
model = socialSig.SocialSigNet(X=X, outDim = batchSize)

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr = lr)

In [39]:
model

SocialSigNet(
  (SocialSig): bilinearImputation()
  (conv2d): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxPool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (block1): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (block2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(1

In [41]:
for t in range(20):

    # Prep the batch for a forward pass
    batchObs = random.sample(range(0, len(X)), batchSize)
    # batchObs = [i for i in range(0, batchSize)]
    modelX = X[batchObs]
    modelX = torch.tensor(list(modelX), requires_grad = True, dtype = torch.float32)
    modely = torch.tensor(y[batchObs], dtype = torch.float32)

    # Forward pass
    y_pred = model(modelX, t)
    loss = criterion(y_pred, modely)  
    
    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    grad = torch.autograd.grad(outputs=loss, inputs=modelX, retain_graph = True)
    loss.backward()
    optimizer.step()

    # Update the coordinate weights
    # https://discuss.pytorch.org/t/updatation-of-parameters-without-using-optimizer-step/34244/4
    with torch.no_grad():
        for name, p in model.named_parameters():
            if name == 'SocialSig.W':
                new_val = update_function(p, grad[0], loss, lr)
                p.copy_(new_val)
    

    print("EPOCH: ", t)
    print("    Loss:     ", loss.item(), "     MAE: ", mae(y_pred, modely).item())
    print("\n")

    if loss.item() < 0:
        break



TypeError: min() received an invalid combination of arguments - got (out=NoneType, axis=NoneType, ), but expected one of:
 * ()
 * (name dim, bool keepdim)
      didn't match because some of the keywords were incorrect: out, axis
 * (Tensor other)
 * (int dim, bool keepdim)
      didn't match because some of the keywords were incorrect: out, axis


In [6]:
y_pred

tensor([241.8838, 173.8015, 303.2863, 268.8087, 223.1893, 349.8376, 167.5740,
        170.3517, 258.2311, 179.5870, 474.5044, 412.7544, 380.2326, 317.9936,
        199.3725, 120.4909, 248.3409, 181.9856, 242.8311, 279.9229, 198.3945,
        134.1983, 202.8821, 162.2353, 135.9135, 143.3357, 221.7710, 145.8651,
         93.0308, 223.3796, 227.9713, 224.0450, 580.7751, 241.8367, 358.9258,
        233.3301, 304.1076, 161.2498, 266.7221, 326.9606,  83.7689, 254.0115,
        284.1503, 205.0828, 240.9863, 171.1255, 257.1509, 206.8205, 143.9474,
        194.6531, 448.3197, 174.8288, 204.6130, 166.6397, 200.8628, 132.3166,
        189.0469, 339.0652, 133.4465, 132.7342, 207.0270, 541.8690, 346.6262,
        127.5920, 200.4330, 392.9853, 300.8218, 624.6135, 172.5773, 165.9590,
        249.8744, 172.1671, 149.4151, 342.9680, 228.3910, 247.0848, 148.5243,
        204.7357, 169.7732, 184.5657, 313.5667, 132.4714, 621.5056, 181.2315,
        158.4976, 419.6691, 174.3665, 290.5771, 111.0200, 111.19

In [7]:
modely

tensor([ 545.,  214.,  313.,    9.,  604.,  197.,   25.,   12.,  115.,  297.,
         107.,  263.,  147.,    6.,  418.,  286.,  125.,   75.,  105.,  101.,
         683.,   94.,  387.,   56.,   41.,  622.,   41.,  316.,    7.,  224.,
         189.,  533.,  487.,  272.,  485.,  199.,  198.,  541.,  277.,  220.,
         599.,   22.,   82.,  205.,  439.,  326.,  237.,  411.,  471.,  143.,
         414.,  861.,  313.,  103.,  280.,   92.,  497.,  101.,  192.,   28.,
         193.,   19.,   29.,  708.,  481.,  284.,    0.,  453.,  176.,   37.,
          56.,   11.,   52.,  149.,  585.,  287.,  122.,  317.,   64.,  123.,
          25.,  182.,   31.,  780.,   92.,  178.,  493.,   59.,    8.,  321.,
           0.,  264.,  236.,  599.,  194.,   92.,  283., 1061.,   16.,  276.,
        1777.,   98.,  396.,   17.,    8.,  448.,  446.,  365.,  591.,  296.,
         326.,  848.,  124.,  184.,  251.,   43.,  265.,   26.,  136.,   78.,
         123.,   64.,  637.,   43.,  205.,    0.,  221.,  273., 

In [9]:
preds_df = pd.DataFrame()
preds_df['true_vals'] = list(modely.clone().detach().numpy())
preds_df['preds'] = list(y_pred.clone().detach().numpy())
preds_df['abs_error'] = abs(preds_df['true_vals'] - preds_df['preds'])
preds_df.head()

Unnamed: 0,true_vals,preds,abs_error
0,545.0,241.883759,303.116241
1,214.0,173.801529,40.198471
2,313.0,303.286255,9.713745
3,9.0,268.808685,259.808685
4,604.0,223.189285,380.810715


In [12]:
import scipy

slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(preds_df['true_vals'], preds_df['preds'])

In [13]:
r_value

-0.059308415018243556

In [42]:
p_value

0.40415421463719203

In [14]:
from sklearn.metrics import r2_score

coefficient_of_dermination = r2_score(preds_df['true_vals'], preds_df['preds'])
coefficient_of_dermination

-0.1688302130404873

In [10]:
print("MAE : ", preds_df['abs_error'].mean())

MAE :  227.4593829727173


In [15]:
preds_df.to_csv("./socialSigPreds.csv")