In [5]:
import numpy as np
import random
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score
import torch
import torch.utils.data as data_utils

from custom_dataloader import replicate_data
from NN_Defs import TwoLayerMLP, validate

import xgboost as xgb

device = torch.device("cpu")

# data load
X = np.load("Input_Class_AllClasses_Sep.npy")
Y = np.load("Target_Class_AllClasses_Sep.npy")

# custom data loader to pull in custom sized data set
# use seed to get replicable results for now
# seed_val = 1111

# CM21 Split
amounts_train = [300,300,300,300,27,70,300]
amounts_val = [82, 531, 104, 278, 6, 17, 4359]
# calling custom datagrabber here
# inp_tr, tar_tr, inp_va, tar_va, inp_te, tar_te = replicate_data(X, Y, 'seven', amounts_train, amounts_val, seed_val)


In [9]:
def bootstrap_estimate(estimator, scoring_func=None, random_seed=0, n_splits=200):
                        
    scores = []

    if scoring_func == accuracy_score:
        for n in range(0,n_splits):
            inp_tr, tar_tr, inp_va, tar_va, inp_te, tar_te = replicate_data(X, Y, 'seven', amounts_train, amounts_val,random.randint(0,1000))
            # scaling data according to training inputs
            scaler_S = StandardScaler().fit(inp_tr)
            inp_tr = scaler_S.transform(inp_tr)
            inp_va = scaler_S.transform(inp_va)

            estimator.fit(inp_tr, tar_tr.ravel())  
            scores.append(scoring_func(tar_va,estimator.predict(inp_va)))
            # scores = list(map(list, zip(*scores)))
        estimate = np.mean(scores)*100.
        stderr = np.std(scores)*100. 

    else:
        for n in range(0,n_splits):
            inp_tr, tar_tr, inp_va, tar_va, inp_te, tar_te = replicate_data(X, Y, 'seven', amounts_train, amounts_val,random.randint(0,1000))
            # scaling data according to training inputs
            scaler_S = StandardScaler().fit(inp_tr)
            inp_tr = scaler_S.transform(inp_tr)
            inp_va = scaler_S.transform(inp_va)
            estimator.fit(inp_tr, tar_tr.ravel())  
            scores.append(scoring_func(tar_va,estimator.predict(inp_va),average=None,zero_division=1))   
        scores = list(map(list, zip(*scores)))
    
        estimate = [np.mean(scores[0])*100.,np.mean(scores[1])*100.,np.mean(scores[2])*100.,np.mean(scores[3])*100.,np.mean(scores[4])*100.,np.mean(scores[5])*100.,np.mean(scores[6])*100.]
        stderr = [np.std(scores[0])*100.,np.std(scores[1])*100.,np.std(scores[2])*100.,np.std(scores[3])*100.,np.std(scores[4])*100.,np.std(scores[5])*100.,np.std(scores[6])*100.]
    
    return estimate, stderr



def bootstrap_estimate_MLP(NN, valid_loader, device,  scoring_func=None, random_seed=0, 
                               alpha=0.05, n_splits=200):
                        
    scores = []

    if scoring_func == accuracy_score:
        for n in range(0,n_splits):
            val_loss, val_predictions, val_truth_values = validate(NN, valid_loader, device)
            scores.append(scoring_func(val_truth_values,val_predictions))
        estimate = np.mean(scores)*100.
        stderr = np.std(scores)*100.

    else:
        for n in range(0,n_splits):
            val_loss, val_predictions, val_truth_values = validate(NN, val_loader, device)
            scores.append(scoring_func(val_truth_values,val_predictions,average=None,zero_division=1))   
            
        scores = list(map(list, zip(*scores)))
        
        estimate = [np.mean(scores[0])*100.,np.mean(scores[1])*100.,np.mean(scores[2])*100.,np.mean(scores[3])*100.,np.mean(scores[4])*100.,np.mean(scores[5])*100.,np.mean(scores[6])*100.]
        stderr = [np.std(scores[0])*100.,np.std(scores[1])*100.,np.std(scores[2])*100.,np.std(scores[3])*100.,np.std(scores[4])*100.,np.std(scores[5])*100.,np.std(scores[6])*100.]
    
    
    return estimate, stderr

## MLP


In [2]:

# scaling data according to training inputs
scaler_S = StandardScaler().fit(inp_tr)
inp_tr = scaler_S.transform(inp_tr)
inp_va = scaler_S.transform(inp_va)
inp_te = scaler_S.transform(inp_te) # Comment out for 75/25 split

# printouts for double checking all the sets and amounts
print('Sizes of Datasets : Inputs , Targets')
print('------------------------------------')
print(f'Training set: {inp_tr.shape} , {tar_tr.shape} \nValidation set: {inp_va.shape} , {tar_va.shape} \nTesting Set: {inp_te.shape}, {tar_te.shape}')
print('------------------------------------')


# creation of tensor instances

inp_tr = torch.as_tensor(inp_tr)
tar_tr = torch.as_tensor(tar_tr)
inp_va = torch.as_tensor(inp_va)
tar_va = torch.as_tensor(tar_va)
inp_te = torch.as_tensor(inp_te)
tar_te = torch.as_tensor(tar_te)

# pass tensors into TensorDataset instances
train_data = data_utils.TensorDataset(inp_tr, tar_tr)
val_data = data_utils.TensorDataset(inp_va, tar_va)
test_data = data_utils.TensorDataset(inp_te, tar_te)

# constructing data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=25, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=25, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=25, shuffle=True)

Sizes of Datasets : Inputs , Targets
------------------------------------
Training set: (1597, 8) , (1597,) 
Validation set: (5377, 8) , (5377,) 
Testing Set: (19929, 8), (19929,)
------------------------------------


In [13]:
# create nn instance
TwoNN = TwoLayerMLP(8, 20, 7)
# load in saved state of network
loadpath = 'MLP_Runs_Results/Two_Layer/'+'FINAL_TwoLayers_300s_Mo09_10kepochs_lr4e1_Settings' # Path to model

TwoNN.load_state_dict(torch.load(loadpath, map_location=device))
# val_loss, val_predictions, val_truth_values = validate(TwoNN, val_loader, device)
# print(precision_score(val_truth_values,val_predictions,average=None))
estR, stderrR = bootstrap_estimate_MLP(TwoNN, val_loader, device, scoring_func=recall_score, random_seed=0, 
                              alpha=0.05, n_splits=200)

estP, stderrP = bootstrap_estimate_MLP(TwoNN, val_loader, device, scoring_func=precision_score, random_seed=0, 
                              alpha=0.05, n_splits=200)

estA, stderrA = bootstrap_estimate_MLP(TwoNN, val_loader, device, scoring_func=accuracy_score, random_seed=0, 
                               alpha=0.05, n_splits=200)
                            

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [14]:
classes = ["Class I", "Class II", "Galaxies", "AGNs", "Shocks", "PAHs", "Stars"]
f = open("PRAScores_2LayerMLP_7Classes.txt", "w")
f.write("TwoLayerMLP Recall & Precision & Accuracy\n")
for i, cl in enumerate(classes):
    if i==3:
        f.write(cl+"& $"+"{:.3f}".format(estR[i])+"\pm"+"{:.3f}".format(stderrR[i])+"$ & $"+
            "{:.3f}".format(estP[i])+"\pm"+"{:.3f}".format(stderrP[i])+"$ & $"+"{:.3f}".format(estA)+"\pm"+"{:.3f}".format(stderrA)+"$ // \n")
    else:
        f.write(cl+"& $"+"{:.3f}".format(estR[i])+"\pm"+"{:.3f}".format(stderrR[i])+"$ & $"+
            "{:.3f}".format(estP[i])+"\pm"+"{:.3f}".format(stderrP[i])+"$&// \n")


f.close()

## XGBoost

In [7]:
xgbcl = xgb.XGBClassifier(max_depth=7,sampling_method='uniform',subsample=1.0,gamma=2,use_label_encoder=False,eval_metric='mlogloss')

# xgbcl.load_model("XGB_Settings.json")



In [10]:
# Recall Scores
estR, stderrR = bootstrap_estimate(xgbcl, scoring_func=recall_score, random_seed=0, n_splits=200)


# Precision Scores
estP, stderrP = bootstrap_estimate(xgbcl, scoring_func=precision_score, random_seed=0, n_splits=200)


# Accuracy Score
estA, stderrA = bootstrap_estimate(xgbcl, scoring_func=accuracy_score, random_seed=0, n_splits=200)


In [11]:
classes = ["Class I", "Class II", "Galaxies", "AGNs", "Shocks", "PAHs", "Stars"]
f = open("PRAScores_XGB_7Classes.txt", "w")
f.write("XGB Recall & Precision & Accuracy\n")
for i, cl in enumerate(classes):
    if i==3:
        f.write(cl+"& $"+"{:.3f}".format(estR[i])+"\pm"+"{:.3f}".format(stderrR[i])+"$ & $"+
            "{:.3f}".format(estP[i])+"\pm"+"{:.3f}".format(stderrP[i])+"$ & $"+"{:.3f}".format(estA)+"\pm"+"{:.3f}".format(stderrA)+"$ // \n")
    else:
        f.write(cl+"& $"+"{:.3f}".format(estR[i])+"\pm"+"{:.3f}".format(stderrR[i])+"$ & $"+
            "{:.3f}".format(estP[i])+"\pm"+"{:.3f}".format(stderrP[i])+"$&// \n")


f.close()

In [9]:
xgbcl.fit(inp_tr,tar_tr.ravel())

pred_va = xgbcl.predict(inp_va)

print(recall_score(tar_va,pred_va,average=None))

[0.8902439  0.85499058 0.94230769 0.92805755 0.16666667 0.17647059
 0.91534756]
