# Running Neural Networks

Remember to change the `ver` variable prior to running!

In [None]:
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np

import torch
import torch.optim as optim
import torch.nn.functional as F

In [None]:
import sys
sys.path.append('C:/Users/chetai/Desktop/pytorch/')

import pdb
import json
import pickle
import numpy as np

# Neural network structure imports
from GCN.GCN import GCN
# from GAT.GAT import GAT
from Dense.Dense import Dense

# Processing imports
from sub_data_process import SubGraphProcess
from full_data_process import GraphDataProcess

# Utility imports
from utils.utils import *
from utils.label_functions import *
from utils.feature_functions import *
from utils.adjacency_functions import *
from utils.train_test_functions import *

## Processing Wrappers

In [None]:
def full_graph_process(param_dict, full_processed_path, full_redo):
    """
    Wrapper for processing data on the full mined data-set
    
    Input(s):
    - param_dict (dict)
    - full_processed_path (string): Save path for processed version of full data
    - full_redo (bool): Whether or not to re-compute 
    
    Output(s):
    GraphDataProcess object 
    """
    if (not os.path.exists(full_processed_path)) or full_redo:
        # Parse path to mined data
        raw_data_path = param_dict['gen_params']['raw_data_path']
        
        # Parse save directory and names of intermediate files
        data_dir = param_dict['gen_params']['data_dir']
        full_names_dict = param_dict['full_names_dict']
        
        # Parse flags of redoing calculation
        full_redo_dict = param_dict['full_redo_dict']
        
        # Get processing object and execute
        graph_data_obj = GraphDataProcess(raw_data_path, data_dir, full_names_dict, full_redo_dict)
        graph_data_obj.run_all()
        
        save_pickle(graph_data_obj, full_processed_path)
    else:
        graph_data_obj = load_pickle(full_processed_path)
    
    return graph_data_obj


def sub_graph_process(param_dict, data_path, full_processed_path, sub_processed_path, sub_redo):
    """
    Wrapper for sampling data subset and organizing model input features
    
    Input(s):
    - param_dict (dict)
    - data_path (string): Path to intermediate save files (for sub-sampling)
    - full_processed_path (string): Path to access full processed data
    - sub_processed_path (string): Path to save or load sub-processed object
    - sub_redo (bool): Whether or not to re-compute 
    """
    if (not os.path.exists(sub_processed_path)) or sub_redo:
        # Parse parameters
        sampling_params = param_dict['sampling_params']
        sub_names_dict = param_dict['sub_names_dict']
        sub_redo_dict = param_dict['sub_redo_dict']
        
        # Dictionary of processing functions
        sub_functions_dict = get_func_dict(param_dict['sub_functions_dict'])
        
        # Get sampling object and execute
        subgraph_data_obj = SubGraphProcess(
            full_processed_path, 
            data_path, 
            sub_names_dict, 
            sub_redo_dict, 
            sub_functions_dict, 
            sampling_params
        )
        subgraph_data_obj.run_all()
        
        save_pickle(subgraph_data_obj, sub_processed_path)
    else:
        subgraph_data_obj = load_pickle(sub_processed_path)
        
    return subgraph_data_obj

## Unwrap and Set General Parameters

In [None]:
# Load parameters
ver = 'dense'
param_path = 'C:/Users/chetai/Desktop/' + ver + '/params.json'
param_dict = json.load(open(param_path,'r'))

In [None]:
# Parsing model type and version
model_type = param_dict['gen_params']['model_type']
ver = param_dict['gen_params']['ver']

# Parsing save directories
data_dir = param_dict['gen_params']['data_dir']
result_dir = param_dict['gen_params']['result_dir']

# Create descriptive save paths
data_path, result_path = set_paths(model_type, ver, data_dir, result_dir)

# Parse processed result names and create paths
full_processed_name = param_dict['gen_params']['full_processed_name']
sub_processed_name = param_dict['gen_params']['sub_processed_name']

full_processed_path = data_dir + full_processed_name
sub_processed_path = data_path + sub_processed_name

# Parse redo settings
full_redo = param_dict['gen_params']['full_redo']
sub_redo = param_dict['gen_params']['sub_redo']

In [None]:
# Get full processed data
graph_data_obj = full_graph_process(param_dict, full_processed_path, full_redo)

# Get sampled processed data
subgraph_data_obj = sub_graph_process(param_dict, data_path, full_processed_path, sub_processed_path, sub_redo)

In [None]:
# Parse split ratio dictionary
split_ratio_dict = param_dict['split_ratio_dict']

# Set binary vs. multi-class classification 
target_grade = -1

# Get data for PyTorch training
features, adj, labels, idx_train, idx_dev, idx_test = sample_and_load_pytorch_data(
    subgraph_data_obj, 
    split_ratio_dict, 
    result_path, 
    target_grade, 
    sub_redo
)

# Set number of labels
num_labels = len(list(set(list(np.asarray(labels)))))

In [None]:
# Save train-dev-test indexes
idx_train_path = data_path + 'train_idxs.pickle'
idx_dev_path = data_path + 'dev_idxs.pickle'
idx_test_path = data_path + 'test_idxs.pickle'

save_pickle(idx_train.numpy(), idx_train_path)
save_pickle(idx_dev.numpy(), idx_dev_path)
save_pickle(idx_test.numpy(), idx_test_path)

## Specify Neural Network Settings

In [None]:
dense_params = param_dict['dense_params']
if dense_params['on']:
    num_epochs = dense_params['num_epochs']
    model = Dense(
        nfeatures=features.shape[1],
        nhidden_layer_list=dense_params['hidden'],
        nclass=num_labels,
        dropout=dense_params['dropout']
    )
    optimizer = optim.Adam(
        model.parameters(),
        lr=dense_params['lr'], 
        weight_decay=dense_params['weight_decay']
    )

In [None]:
gcn_params = param_dict['gcn_params']
if gcn_params['on']:
    num_epochs = gcn_params['num_epochs']
    model = GCN(
        nfeatures=features.shape[1],
        nhidden_layer_list=gcn_params['hidden'],
        nclass=num_labels,
        dropout=gcn_params['dropout']
    )
    optimizer = optim.Adam(
        model.parameters(),
        lr=gcn_params['lr'], 
        weight_decay=gcn_params['weight_decay']
    )

In [None]:
# Show model
model

In [None]:
# Train model
train_dict = {}
train_dict['optimizer'] = optimizer
train_dict['features'] = features
train_dict['adj'] = adj
train_dict['labels'] = labels
train_dict['idx_train'] = idx_train
train_dict['idx_val'] = idx_dev
train_dict['num_epochs'] = num_epochs

model = run_train(model, train_dict)

In [None]:
# Test model
test_dict = {}
test_dict['features'] = features
test_dict['adj'] = adj
test_dict['labels'] = labels
test_dict['idx_test'] = idx_test

test(model, test_dict)

In [None]:
# Save model
model_name = 'model.pickle'
save_pickle(model, result_path + model_name)