# Case Prediction
Case prediction for different FB datasets

In [1]:
# Imports
import torch
import torch.nn.functional as F
import torch.nn as nn 
from torch_geometric.nn import GCNConv
import pandas as pd
import os
from torch_geometric.data import Data
from datetime import timedelta

In [2]:
# Paramters
d = 7 # days back
d_ahead = 5 # Days Ahead
datasets_folder = '/home/minigonche/Dropbox/Projects/covid_fb_pipeline/data_repo/data/data_stages'
dataset = 'bogota/agglomerated/geometry'

In [3]:
# Loads the graph
edges = pd.read_csv(os.path.join(datasets_folder,dataset,'movement.csv'),parse_dates = ['date_time'])
nodes = pd.read_csv(os.path.join(datasets_folder,dataset,'polygons.csv'))
cases = pd.read_csv(os.path.join(datasets_folder,dataset,'cases.csv'),parse_dates = ['date_time'])

In [4]:
# Constructs the ordered nodes
nodes['node_id'] = nodes.index.values
nodes.index = nodes.poly_id

edges.start_poly_id = edges.start_poly_id.apply(lambda i: nodes.loc[i,'node_id'])
edges.end_poly_id = edges.end_poly_id.apply(lambda i: nodes.loc[i,'node_id'])

cases.poly_id = cases.poly_id.apply(lambda i: nodes.loc[i,'node_id'])
cases = cases.sort_values(['date_time','poly_id'])


In [5]:

final_date = pd.to_datetime("2020-12-01")
dataset = []

d_i = pd.to_datetime('2020-03-30')
while d_i <= final_date:
    
    #print(d_i)
    
    edges_i = edges[edges.date_time == d_i]

    nodes_attributes = nodes[['poly_id','node_id']].merge(cases.loc[cases.date_time == d_i, ['poly_id','num_cases']].rename(columns = {'poly_id':'node_id'}),on='node_id', how = 'left')
    nodes_attributes = nodes_attributes.fillna(0).rename(columns={'num_cases':"num_cases_0"})
    
    d_j = d_i - timedelta(days = 1)
    i = 1
    while d_j > d_i - timedelta(days = d):
        temp_cases = cases.loc[cases.date_time == d_j, ['poly_id','num_cases']].copy()
        nodes_attributes[f'num_cases_{i}'] = 0
        nodes_attributes.loc[temp_cases.poly_id.values, f'num_cases_{i}'] = temp_cases['num_cases'].values

        d_j -= timedelta(days = 1)
        i += 1

    # Target
    df_y = nodes[['poly_id','node_id']].merge(cases.loc[cases.date_time == (d_i + timedelta(days = d_ahead)), ['poly_id','num_cases']].rename(columns = {'poly_id':'node_id'}),on='node_id', how = 'left')
    y = torch.tensor(df_y.fillna(0).num_cases.astype(float).values.tolist())
    
    edge_index_tensor = torch.tensor([edges_i.start_poly_id.values.tolist(),edges_i.end_poly_id.values.tolist()], dtype=torch.long)
    edge_weight_tensor =  torch.tensor(edges_i.movement.astype(float).values.tolist())
        
    nodes_attributes_tensor = torch.tensor([nodes_attributes.loc[2, [f"num_cases_{j}" for j in range(d)]].values.tolist() for ind in nodes_attributes.index ], dtype=torch.float)


    data = Data(x=nodes_attributes_tensor, 
                edge_index=edge_index_tensor, 
                edge_weight = edge_weight_tensor,
                y = y)    
    
    dataset.append(data)
    
    d_i += timedelta(days = 1)

display(nodes_attributes.sample(5))

Unnamed: 0,poly_id,node_id,num_cases_0,num_cases_1,num_cases_2,num_cases_3,num_cases_4,num_cases_5,num_cases_6
5,colombia_bogota_localidad_ciudad_bolivar,5,92,63,58,84,71,84,89
0,colombia_bogota_localidad_los_martires,0,17,15,15,7,15,18,23
11,colombia_bogota_localidad_suba,11,248,229,157,199,222,223,214
16,colombia_bogota_localidad_barrios_unidos,16,32,22,22,22,29,22,29
7,colombia_bogota_localidad_teusaquillo,7,55,38,23,28,36,32,36


In [6]:
class Net(torch.nn.Module):
    def __init__(self, num_nodes, node_features):
        super(Net, self).__init__()
        
        
        self.mlp_embedding = nn.Sequential(
            nn.Linear(node_features, 64),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(64, 1)
        )
                
        self.conv1 = GCNConv(1, 32)
        self.conv2 = GCNConv(32 + 1, 32)
        
        self.mlp_predict = nn.Sequential(
            nn.Linear(32, 32),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(32, 1)
        )

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
        
        # Embeds
        H_0 = self.mlp_embedding(x)
        
        print(H_0.size())
        
        # First Jump
        H_1 = self.conv1(H_0, edge_index, edge_weight)
        H_1 = F.relu(H_1)
        H_1 = F.dropout(H_1, training=self.training)
        
        # Concats
        H_1 = torch.cat([H_1 , H_0], dim=-1)
        
        # Second Jump
        H_2 = self.conv2(H_1, edge_index, edge_weight)
        H_2 = F.relu(H_2)
        H_2 = F.dropout(H_2, training=self.training)
        
        p = self.mlp_predict(H_2)
        
        return p



In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(dataset[0].num_nodes, d).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=5e-4)
loss_fun = nn.MSELoss()
model.train()
for epoch in range(200):
    for data in dataset:
        #data = dataset[0]
        optimizer.zero_grad()
        out = torch.squeeze(model(data))
        loss = loss_fun(out, data.y)
        #loss = loss_fun(out, torch.tensor([0.0 for i in range(19)])) 
        loss.backward()
        optimizer.step()
        
    print(loss)

torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])
torch.Size([19, 1])


KeyboardInterrupt: 

In [None]:
from torch.nn import Linear, ReLU, Dropout
from torch_geometric.nn import Sequential, GCNConv, JumpingKnowledge
from torch_geometric.nn import global_mean_pool

model = Sequential('x, edge_index, batch', [
    (Dropout(p=0.5), 'x -> x'),
    (GCNConv(dataset.num_features, 64), 'x, edge_index -> x1'),
    ReLU(inplace=True),
    (GCNConv(64, 64), 'x1, edge_index -> x2'),
    ReLU(inplace=True),
    (lambda x1, x2: [x1, x2], 'x1, x2 -> xs'),
    (JumpingKnowledge("cat", 64, num_layers=2), 'xs -> x'),
    (global_mean_pool, 'x, batch -> x'),
    Linear(2 * 64, dataset.num_classes),
])