In [11]:
# library imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils

from sklearn.metrics import ConfusionMatrixDisplay, classification_report, recall_score, precision_score, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,  GridSearchCV

# custom script inputs
from NN_Defs import get_n_params, train, validate, BaseMLP

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Running on : {device}')

datadir = 'Saved_Final_Data/'

Running on : cpu


In [12]:
import numpy as np
# custom made libraries
from custom_dataloader import replicate_data
# data load
X = np.load("Input_Class_AllClasses_Sep.npy")
Y = np.load("Target_Class_AllClasses_Sep.npy")
# Y = np.load("Pred_Class_AllClasses_Sep.npy") # For predicted targets from CM21

# custom data loader to pull in custom sized data set
# use seed to get replicable results for now
seed_val = 1111

# the amounts below are how many of each class of object you want in the training set and validation set - leftover amounts given to testing set


# CM21 Split
amounts_train = [331,1141,231,529,27,70,1257]
amounts_val = [82, 531, 104, 278, 6, 17, 4359]
# amounts_train = [331,1141,231+529+27+70+1257] # C-targets
# amounts_val = [82, 531, 104+278+6+17+4359]

# 300s Split
# amounts_train = [300,300,300,300,27,70,300]
# amounts_val = [82, 531, 104, 278, 6, 17, 4359]
# amounts_train = [300,300,300+300+27+70+300]
# amounts_val = [82, 531, 104+278+6+17+4359]

# # 75/25 Split
# amounts_train = [311,1994,391,1043,25,66,21796] #75/25 train
# amounts_val = [103,665,130,348,9,22,5449] #75/25 val
# amounts_train = [311,1994,391+1043+25+66+21796] #75/25 train
# amounts_val = [103,665,130+348+9+22+5449] #75/25 val


# calling custom datagrabber here
inp_tr, tar_tr, inp_va, tar_va, inp_te, tar_te = replicate_data(X, Y, 'seven', amounts_train, amounts_val, seed_val)

# scaling data according to training inputs
scaler_S = StandardScaler().fit(inp_tr)
inp_tr = scaler_S.transform(inp_tr)
inp_va = scaler_S.transform(inp_va)
inp_te = scaler_S.transform(inp_te) # Comment out for 75/25 split

# printouts for double checking all the sets and amounts
print('Sizes of Datasets : Inputs , Targets')
print('------------------------------------')
print(f'Training set: {inp_tr.shape} , {tar_tr.shape} \nValidation set: {inp_va.shape} , {tar_va.shape} \nTesting Set: {inp_te.shape}, {tar_te.shape}')
print('------------------------------------')

Sizes of Datasets : Inputs , Targets
------------------------------------
Training set: (3586, 8) , (3586,) 
Validation set: (5377, 8) , (5377,) 
Testing Set: (17940, 8), (17940,)
------------------------------------


In [13]:
inputs = np.concatenate((inp_tr,inp_va,inp_te))
targets = np.concatenate((tar_tr,tar_va,tar_te))
np.save("MLP_Val_G-targets7.npy",targets)
tar_tr = np.where(tar_tr<2,tar_tr,2)
tar_va = np.where(tar_va<2,tar_va,2)
tar_te = np.where(tar_te<2,tar_te,2)
inputs1 = np.concatenate((inp_tr,inp_va,inp_te))
targets1 = np.concatenate((tar_tr,tar_va,tar_te))
inputs1 = torch.tensor(inputs1)
targets1 = torch.tensor(targets1)
all_data = data_utils.TensorDataset(inputs1, targets1)
all_loader = torch.utils.data.DataLoader(all_data, batch_size=25, shuffle=True)
np.save("MLP_Val_G-targets2.npy",targets1)


In [14]:
loadpath = datadir+'Final_CSplit_4e-2_Settings'
BaseNN = BaseMLP(8, 20, 3)
BaseNN.load_state_dict(torch.load(loadpath, map_location=device))

<All keys matched successfully>

In [15]:
val_loss, val_accuracy, val_predictions, val_truth_values = validate(BaseNN, all_loader, device)


[array([1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2]), array([2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2]), array([2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0, 2,
       2, 2, 2]), array([2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0,
       2, 2, 2]), array([1, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2,
       1, 1, 2]), array([2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 2, 2, 2, 1,
       2, 2, 2]), array([2, 2, 2, 2, 2, 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2]), array([1, 2, 2, 1, 2, 2, 2, 2, 0, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2,
       2, 2, 2]), array([2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 2, 2,
       2, 2, 2]), array([2, 2, 1, 0, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2,
       1, 2, 2]), array([2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 1])

In [8]:
np.save("MLP_CM21_G-targets_VPred.npy",val_predictions)

In [7]:
print(classification_report(val_truth_values,val_predictions))

              precision    recall  f1-score   support

           0       0.78      0.93      0.85       414
           1       0.87      0.94      0.90      2659
           2       1.00      0.98      0.99     23830

    accuracy                           0.98     26903
   macro avg       0.88      0.95      0.91     26903
weighted avg       0.98      0.98      0.98     26903



In [9]:
np.save("MLP_CM21_G-targets_VTruthVals.npy",val_truth_values)

In [17]:
print(enumerate(all_loader))

<enumerate object at 0x7f8b395b7e40>
