# Running Neural Networks

Remember to change the `model_type` and `ver` variables prior to running!

In [1]:
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np

import torch
import torch.optim as optim
import torch.nn.functional as F

In [2]:
import sys
sys.path.append('C:/Users/chetai/Documents/Projects/moonGen/')

import pdb
import json
import pickle
import numpy as np

# Neural network structure imports
from GCN.GCN import GCN
from Dense.Dense import Dense

# Processing imports
from sub_data_process import SubGraphProcess
from full_data_process import GraphDataProcess

# Utility imports
from utils.utils import *
from utils.label_functions import *
from utils.feature_functions import *
from utils.adjacency_functions import *
from utils.train_test_functions import *

## Processing Wrappers

In [3]:
def full_graph_process(param_dict, full_processed_path, full_redo):
    """
    Wrapper for processing data on the full mined data-set
    
    Input(s):
    - param_dict (dict)
    - full_processed_path (string): Save path for processed version of full data
    - full_redo (bool): Whether or not to re-compute 
    
    Output(s):
    GraphDataProcess object 
    """
    if (not os.path.exists(full_processed_path)) or full_redo:
        # Parse path to mined data
        raw_data_path = param_dict['gen_params']['raw_data_path']
        
        # Parse main save directory and names of intermediate files
        data_dir = param_dict['gen_params']['data_dir']
        full_names_dict = param_dict['full_names_dict']
        
        # Parse flags of redoing calculation
        full_redo_dict = param_dict['full_redo_dict']
        
        # Get processing object and execute
        graph_data_obj = GraphDataProcess(raw_data_path, data_dir, full_names_dict, full_redo_dict)
        graph_data_obj.run_all()
        
        save_pickle(graph_data_obj, full_processed_path)
    else:
        graph_data_obj = load_pickle(full_processed_path)
    
    return graph_data_obj


def sub_graph_process(param_dict, data_path, full_processed_path, sub_processed_path, sub_redo):
    """
    Wrapper for sampling data subset and organizing model input features
    
    Input(s):
    - param_dict (dict)
    - data_path (string): Path to intermediate save files (for sub-sampling)
    - full_processed_path (string): Path to access full processed data
    - sub_processed_path (string): Path to save or load sub-processed object
    - sub_redo (bool): Whether or not to re-compute 
    """
    if (not os.path.exists(sub_processed_path)) or sub_redo:
        # Parse parameters
        sampling_params = param_dict['sampling_params']
        sub_names_dict = param_dict['sub_names_dict']
        sub_redo_dict = param_dict['sub_redo_dict']
        
        # Dictionary of processing functions
        sub_functions_dict = get_func_dict(param_dict['sub_functions_dict'])
        
        # Get sampling object and execute
        subgraph_data_obj = SubGraphProcess(
            full_processed_path, 
            data_path, 
            sub_names_dict, 
            sub_redo_dict, 
            sub_functions_dict, 
            sampling_params
        )
        subgraph_data_obj.run_all()
        
        save_pickle(subgraph_data_obj, sub_processed_path)
    else:
        subgraph_data_obj = load_pickle(sub_processed_path)
        
    return subgraph_data_obj

## Unwrap and Set General Parameters

In [4]:
# Load parameters
model_type = 'GCN'
ver = 'v0'

param_path = 'C:/Users/chetai/Desktop/moonboard_data/%s/%s/params.json' % (model_type, ver)
param_dict = json.load(open(param_path,'r'))

In [5]:
# Parsing save directories
data_dir = param_dict['gen_params']['data_dir']
data_path = param_dict['gen_params']['data_subpath']
result_path = param_dict['gen_params']['result_subpath']

# Parse processed result names and create paths
full_processed_name = param_dict['gen_params']['full_processed_name']
sub_processed_name = param_dict['gen_params']['sub_processed_name']

full_processed_path = data_dir + full_processed_name
sub_processed_path = data_path + sub_processed_name

# Parse redo settings
full_redo = param_dict['gen_params']['full_redo']
sub_redo = param_dict['gen_params']['sub_redo']

In [6]:
# Get full processed data
graph_data_obj = full_graph_process(param_dict, full_processed_path, full_redo)

# Get sampled processed data
subgraph_data_obj = sub_graph_process(param_dict, data_path, full_processed_path, sub_processed_path, sub_redo)


Mapping nodes...
Finished mapping nodes!

Mapping adjacency...
Finished mapping adjacency!

Training TFIDF...
Finished training TFIDF!
Sampling core nodes...
Getting samples node features...
Getting samples node adjacency...
Getting samples node labels...


In [7]:
# Parse split ratio dictionary
split_ratio_dict = param_dict['split_ratio_dict']

# Set binary vs. multi-class classification 
target_grade = -1

# Get data for PyTorch training
features, adj, labels, idx_train, idx_dev, idx_test = sample_and_load_pytorch_data(
    subgraph_data_obj, 
    split_ratio_dict, 
    result_path, 
    target_grade, 
    sub_redo
)

# Set number of labels
num_labels = len(list(set(list(np.asarray(labels)))))

In [8]:
# Save train-dev-test indexes
idx_train_path = data_path + 'train_idxs.pickle'
idx_dev_path = data_path + 'dev_idxs.pickle'
idx_test_path = data_path + 'test_idxs.pickle'

save_pickle(idx_train.numpy(), idx_train_path)
save_pickle(idx_dev.numpy(), idx_dev_path)
save_pickle(idx_test.numpy(), idx_test_path)

## Specify Neural Network Settings

In [9]:
dense_params = param_dict['dense_params']
if dense_params['on']:
    num_epochs = dense_params['num_epochs']
    model = Dense(
        nfeatures=features.shape[1],
        nhidden_layer_list=dense_params['hidden'],
        nclass=num_labels,
        dropout=dense_params['dropout']
    )
    optimizer = optim.Adam(
        model.parameters(),
        lr=dense_params['lr'], 
        weight_decay=dense_params['weight_decay']
    )

In [10]:
gcn_params = param_dict['gcn_params']
if gcn_params['on']:
    num_epochs = gcn_params['num_epochs']
    model = GCN(
        nfeatures=features.shape[1],
        nhidden_layer_list=gcn_params['hidden'],
        nclass=num_labels,
        dropout=gcn_params['dropout']
    )
    optimizer = optim.Adam(
        model.parameters(),
        lr=gcn_params['lr'], 
        weight_decay=gcn_params['weight_decay']
    )

| Kaiming Initialization
| Kaiming Initialization


In [11]:
# Show model
model

GCN(
  (gc_list): ModuleList(
    (0): GraphConvolution (1240 -> 32)
    (1): GraphConvolution (32 -> 12)
  )
)

In [12]:
# Train model
train_dict = {}
train_dict['optimizer'] = optimizer
train_dict['features'] = features
train_dict['adj'] = adj
train_dict['labels'] = labels
train_dict['idx_train'] = idx_train
train_dict['idx_val'] = idx_dev
train_dict['num_epochs'] = num_epochs

model = run_train(model, train_dict)

Epoch: 0001 loss_train: 2.4872 acc_train: 0.0903 loss_val: 2.4807 acc_val: 0.1146 time: 0.0890s
Epoch: 0002 loss_train: 2.4800 acc_train: 0.0931 loss_val: 2.4796 acc_val: 0.1338 time: 0.0160s
Epoch: 0003 loss_train: 2.4728 acc_train: 0.1203 loss_val: 2.4782 acc_val: 0.1274 time: 0.0160s
Epoch: 0004 loss_train: 2.4678 acc_train: 0.1074 loss_val: 2.4766 acc_val: 0.1210 time: 0.0150s
Epoch: 0005 loss_train: 2.4624 acc_train: 0.1203 loss_val: 2.4745 acc_val: 0.1210 time: 0.0140s
Epoch: 0006 loss_train: 2.4554 acc_train: 0.1318 loss_val: 2.4723 acc_val: 0.1210 time: 0.0160s
Epoch: 0007 loss_train: 2.4511 acc_train: 0.1318 loss_val: 2.4699 acc_val: 0.1210 time: 0.0140s
Epoch: 0008 loss_train: 2.4427 acc_train: 0.1404 loss_val: 2.4672 acc_val: 0.1338 time: 0.0130s
Epoch: 0009 loss_train: 2.4374 acc_train: 0.1447 loss_val: 2.4642 acc_val: 0.1338 time: 0.0150s
Epoch: 0010 loss_train: 2.4321 acc_train: 0.1490 loss_val: 2.4612 acc_val: 0.1146 time: 0.0150s
Epoch: 0011 loss_train: 2.4266 acc_train

In [13]:
# Test model
test_dict = {}
test_dict['features'] = features
test_dict['adj'] = adj
test_dict['labels'] = labels
test_dict['idx_test'] = idx_test

test(model, test_dict)

Test set results: loss= 2.3166 accuracy= 0.1429


In [14]:
# Save model
model_name = 'model.pickle'
save_pickle(model, result_path + model_name)