In [73]:
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F

import numpy as np
import random
import json

from aggregator import *
from graphsage import *
from encoder import *

In [74]:
with open("./data/graph.json") as g:
    graph = json.load(g)

x, adj_lists, y = [], {}, []

a = 0
for muni_id, dta in graph.items():
    x.append(dta["x"])
    y.append(dta["label"])
    adj_lists[str(a)] = dta["neighbors"]
    a += 1
    
x = np.array(x)
y = np.expand_dims(np.array(y), 1)

In [75]:
agg = MeanAggregator(features = x,
                    gcn = False)
enc = Encoder(features = x, 
              feature_dim = x.shape[1], 
              embed_dim = 128, 
              adj_lists = adj_lists,
              aggregator = agg)

  init.xavier_uniform(self.weight1)


In [76]:
model = SupervisedGraphSage(num_classes = 1,
                            enc = enc)

optimizer = torch.optim.Adam(filter(lambda p : p.requires_grad, model.parameters()), lr=0.01)

  init.xavier_uniform(self.weight)


In [77]:
n = 64 # batch_size

train_num = int(x.shape[0] * .70)
train_indices = random.sample(range(0, x.shape[0]), train_num)
val_indices = [i for i in range(0, x.shape[0]) if i not in train_indices]

train_indices_b = [train_indices[i * n:(i + 1) * n] for i in range((len(train_indices) + n - 1) // n )] 
val_indices_b   = [val_indices[i * n:(i + 1) * n] for i in range((len(val_indices) + n - 1) // n )] 

In [78]:
for epoch in range(0, 200):
    
    running_train_loss, running_val_loss = 0, 0
    
    for batch in train_indices_b:
        
        model.train()
                
        batch_nodes = [str(i) for i in batch]
        batch_ys = torch.tensor([y[int(i)] for i in batch])

        optimizer.zero_grad()
        loss = model.loss(batch_nodes, batch_ys)
        
        running_train_loss += loss.item()
        loss.backward()
        optimizer.step()
        
    for batch in val_indices_b:
        
        model.eval()
                
        batch_nodes = [str(i) for i in batch]
        batch_ys = torch.tensor([y[int(i)] for i in batch])

        loss = model.loss(batch_nodes, batch_ys)
        
        running_val_loss += loss.item()
        
    t_loss = running_train_loss / len(train_indices)
    v_loss = running_val_loss / len(val_indices)
        
    print("Epoch: ", epoch, "Training Loss: ", t_loss, " Validation Loss: ", v_loss)

Epoch:  0 Training Loss:  30.17127169744932  Validation Loss:  26.133861418693296
Epoch:  1 Training Loss:  29.102245452163295  Validation Loss:  25.433693227460306
Epoch:  2 Training Loss:  28.469685408089667  Validation Loss:  25.104756902879284
Epoch:  3 Training Loss:  27.76006970333031  Validation Loss:  24.438416807113157
Epoch:  4 Training Loss:  26.575773922404476  Validation Loss:  23.78987480901903
Epoch:  5 Training Loss:  25.345523311884076  Validation Loss:  23.28914578345514
Epoch:  6 Training Loss:  24.56903540478043  Validation Loss:  22.850752996629286
Epoch:  7 Training Loss:  24.054647654088864  Validation Loss:  22.499888315508443
Epoch:  8 Training Loss:  23.662631761648512  Validation Loss:  22.142512955204133
Epoch:  9 Training Loss:  23.335095839546593  Validation Loss:  21.81769079392956
Epoch:  10 Training Loss:  23.04156958446793  Validation Loss:  21.52873274280179
Epoch:  11 Training Loss:  22.772364150768823  Validation Loss:  21.30716365691154
Epoch:  12 

In [79]:
model.scores

tensor([[  87.4780],
        [  25.6122],
        [  16.7415],
        [  13.9004],
        [  99.0445],
        [ 153.3100],
        [  56.2089],
        [  47.8745],
        [ 107.6732],
        [ 124.8682],
        [  73.5030],
        [ 142.2534],
        [  62.9483],
        [  58.3465],
        [ 129.6899],
        [  79.9416],
        [ 318.7852],
        [  61.5948],
        [ 102.2355],
        [ 103.2169],
        [  97.1065],
        [  18.7762],
        [ 110.2240],
        [ 146.3541],
        [  73.0353],
        [ 126.9746],
        [  36.9999],
        [ 110.0958],
        [ 103.7611],
        [  53.2191],
        [ 853.3050],
        [ 566.6194],
        [1088.6333],
        [ 747.7380],
        [3478.5649],
        [1309.6831],
        [3827.7168],
        [  47.8296],
        [1014.8962],
        [ 955.1300],
        [1619.0576],
        [1080.9343],
        [ 513.8354],
        [1110.3741]], grad_fn=<TBackward>)

In [80]:
import pandas as pd

In [81]:
trues, preds = [], []

for index in val_indices:
    
    try:
    
        input = [str(index)]
        output = torch.tensor(y[index])

        model.eval()

        loss = model.loss(input, output)

        trues.append(y[index][0])
        preds.append(model.scores.item())
        
    except:
        
        print(index)
    
    
preds_df = pd.DataFrame()
preds_df['true'], preds_df['pred'] = trues, preds
preds_df["abs_diff"] = abs(preds_df['true'] - preds_df['pred'])
preds_df

  return F.l1_loss(input, target, reduction=self.reduction)


115
122
126
152
153
1608
2027
2043


Unnamed: 0,true,pred,abs_diff
0,988.0,4377.952637,3389.952637
1,1446.0,1027.879883,418.120117
2,0.0,150.970917,150.970917
3,955.0,137.917358,817.082642
4,37.0,118.052299,81.052299
...,...,...,...
607,974.0,1405.406494,431.406494
608,1918.0,1608.859375,309.140625
609,1551.0,1456.220337,94.779663
610,717.0,615.752930,101.247070


In [82]:
preds_df.to_csv("./predictions/graph_preds_v2_validation.csv")