In [239]:
import matplotlib.pyplot as plt
from copy import deepcopy
import rasterio as rio
import pandas as pd
import numpy as np
import importlib
import sklearn
import random
import pickle
import torch
import math
import copy

import resnet18
importlib.reload(resnet18)
from helpers import *

In [194]:
import warnings
warnings.filterwarnings('ignore')


In [195]:
class dataLoader():

    def __init__(self, dir, df):
        self.data = []
        self.labels = []
        df = pd.read_csv(df)

        for i in os.listdir(dir):
            fname = os.path.join(dir, i)
            im = np.array(rio.open(fname).read(1))
            im = torch.from_numpy(im)
            im = torch.reshape(im, (1, 224, 224)).numpy()
            num_mig = df[df['sending'] == int(fname.split("m")[1].split(".")[0])]['US_MIG_05_10'].to_list()[0]

            self.data.append(im)
            self.labels.append(num_mig)

    def train_val_split(self, split):
        train_num = int(len(self.data) * split)
        train_indices = random.sample(range(0, len(self.data)), train_num)
        val_indices = [i for i in range(0, len(self.data)) if i not in train_indices]
        x_train, y_train = [self.data[i] for i in train_indices], [self.labels[i] for i in train_indices]
        x_val, y_val = [self.data[i] for i in val_indices], [self.labels[i] for i in val_indices]
        return x_train, y_train, x_val, y_val 


In [196]:
d = dataLoader("./final_pics", "./us_migration.csv")

In [197]:
x_train, y_train, x_val, y_val = d.train_val_split(split = .80)

In [287]:
#y - 'number_moved'
#x - 'everything else that is or can be represented as a float.'

####### Build and fit the Model
lr = 1e-7
batchSize = 50
model = resnet18.resnet18(outDim = batchSize)

In [288]:
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr = lr)

In [289]:
def train_model(model, criterion, optimizer, lr, batchSize, num_epochs):

    losses = []
    maes = []
    best_mae = 100000000
    i_to_print = torch.tensor([0,1,2,3,4])

    for t in range(num_epochs):

        for phase in ['train', 'val']:

            if phase == 'train':

                #Batches
                batchObs = random.sample(range(0, len(y_train)), batchSize)
                # print(batchObs)
                modelX = [x_train[i] for i in batchObs]
                modelX = torch.tensor(list(modelX), requires_grad = True, dtype = torch.float32)
                modely = torch.tensor([y_train[i] for i in batchObs], dtype = torch.float32)  # MADE A CHANGE HERE 
                y_pred = model(modelX, t)
                
                loss = criterion(y_pred, modely)  
                
                # Zero gradients, perform a backward pass, and update the weights.
                optimizer.zero_grad()
                grad = torch.autograd.grad(outputs=loss, inputs=modelX, retain_graph = True)
                # print("    GRADIENT: ", grad[0][0].shape)
                # print("    GRADIENT: ", grad[0])
                loss.backward()
                optimizer.step()
                # https://discuss.pytorch.org/t/updatation-of-parameters-without-using-optimizer-step/34244/4
                # with torch.no_grad():
                #     for name, p in model.named_parameters():
                #         if name == 'SocialSig.W':
                #         # print("    In with:        ", p.data)
                #             new_val = socialSigLayers.update_function(p, grad[0], loss, lr)
                #         # print("NEW WEIGHTS: ", new_val)
                #             p.copy_(new_val)

                print("EPOCH: ", t)
                # print("    Train")
                epoch_mae = mae(y_pred, modely).item()
                print("    TRAIN    Loss:   ", loss.item(), "     MAE:   ", epoch_mae)
                print("    Train Preds:     ", torch.index_select(y_pred, 0, i_to_print))
                print("    Train True:       ", torch.index_select(modely, 0, i_to_print))

            if phase == 'val':

                #Batches
                batchObs = random.sample(range(0, len(y_train)), batchSize)
                # print(batchObs)
                modelX = [x_train[i] for i in batchObs]
                modelX = torch.tensor(list(modelX), requires_grad = True, dtype = torch.float32)
                modely = torch.tensor([y_train[i] for i in batchObs], dtype = torch.float32)  # MADE A CHANGE HERE 
                
                # Perform evaluations of the batch predictions
                y_pred = model(modelX, t)
                
                loss = criterion(y_pred, modely)  
                epoch_mae = mae(y_pred, modely).item()
                print("    VAL      Loss:   ", loss.item(), "     MAE:   ", epoch_mae)
                print("    Val Preds:       ", torch.index_select(y_pred, 0, i_to_print))
                print("    Val True:       ", torch.index_select(modely, 0, i_to_print))

                if epoch_mae < best_mae:
                    
                    best_mae = epoch_mae
                    best_model_wts = copy.deepcopy(model.state_dict())
                    model_name = './models/test_Epoch' + str(t) + '.sav'
                    pickle.dump(model, open(model_name, 'wb'))

                losses.append(loss.item())
                maes.append(epoch_mae)
                
        print("\n")

    print("TRAINING COMPLETE")
    print("Best MAE: ", best_mae)


    for name, p in model.named_parameters():
        if name == 'SocialSig.W':
            print(p.data)


    # print(best_model_wts)

    # return model.load_state_dict(best_model_wts)
    return best_model_wts, losses, maes, y_pred, modely

In [290]:
num_epochs = 20

best_model_wts, losses, maes, ypreds, ytrue = train_model(model, criterion, optimizer, lr, batchSize, num_epochs)

EPOCH:  0
    TRAIN    Loss:    10487957.0      MAE:    316.443359375
    Train Preds:      tensor([ 0.3873,  0.8914, -0.8172, -0.1712,  0.1006],
       grad_fn=<IndexSelectBackward>)
    Train True:        tensor([482.,  34., 270.,   0., 549.])
    VAL      Loss:    4579889.0      MAE:    224.42959594726562
    Val Preds:        tensor([29.8557,  2.5505, 16.2873, -0.4924, 33.7161],
       grad_fn=<IndexSelectBackward>)
    Val True:        tensor([194., 283.,   8., 278., 446.])


EPOCH:  1
    TRAIN    Loss:    11222692.0      MAE:    259.5580139160156
    Train Preds:      tensor([29.7198,  2.3270, 15.8842,  0.2193, 33.4683],
       grad_fn=<IndexSelectBackward>)
    Train True:        tensor([  28.,  444., 2435.,  139.,   37.])
    VAL      Loss:    33742160.0      MAE:    371.6769104003906
    Val Preds:        tensor([ 33.2029,  29.8107, 165.8751,   8.8219,  38.3747],
       grad_fn=<IndexSelectBackward>)
    Val True:        tensor([  28.,  248., 1777.,  777.,   61.])


EPOCH:  2

In [291]:
ypreds

tensor([239.4654, 230.8529, 388.7881, 205.3544, 116.7692, 209.8984, 320.1414,
        220.0085, 262.6922, 220.5902, 108.7135, 207.8001, 224.9085, 188.0542,
        126.9061, 103.1261, 271.0197,  85.6989, 298.4025, 352.7805, 182.3047,
        254.9855, 262.8445, 334.5716, 196.8987, 284.6664, 394.3165, 144.0518,
        121.1913, 236.6511, 251.5620, 192.1383, 248.8824, 122.9638, 281.4716,
        348.8988, 240.2830, 214.0232, 394.4469, 203.0864, 276.6667, 243.5764,
        253.3100, 160.2995, 334.8738, 193.6625, 161.0563, 183.8346, 183.0976,
        160.3300], grad_fn=<AddBackward0>)

In [328]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn import preprocessing
from sklearn import tree
import pandas as pd
import random

In [None]:
ytruelist = list(ytrue.detach().numpy())
ypredlist = list(ypreds.detach().numpy())

In [330]:
mae = mean_absolute_error(ytruelist, ypredlist)
mae

185.35498

In [331]:
mape = mean_absolute_percentage_error(ytruelist, ypredlist)
mape

13.829773

In [332]:
r2 = r2_score(ytruelist, ypredlist)
r2

-0.10854503498266865

In [334]:
abs(sum(ytruelist) - sum(ypredlist) / len(ypredlist))

11574.141670684814

In [317]:
from sklearn.metrics import mean_absolute_percentage_error


In [321]:
ytruelist = list(ytrue.detach().numpy())
ypredlist = list(ypreds.detach().numpy())

In [323]:
mean_absolute_percentage_error(ypredlist, ytruelist)

0.86736137

In [327]:
mean_absolute_percentage_error(ytruelist, ypredlist)

13.829773