In [2]:
import numpy as np
import random
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score
import torch
import torch.utils.data as data_utils
import torch.optim as optim
import multiprocess as mp

from custom_dataloader import replicate_data
from NN_Defs import TwoLayerMLP, BaseMLP, train, validate

import xgboost as xgb

device = torch.device("cpu")

# data load
X = np.load("Input_Class_AllClasses_Sep.npy")
Y = np.load("Target_Class_AllClasses_Sep.npy")

# CM21 Split
amounts_train = [300,300,300,300,27,70,300]
amounts_val = [82, 531, 104, 278, 6, 17, 4359]


  from pandas import MultiIndex, Int64Index


In [3]:
def bootstrap_estimate(estimator, scoring_func=None, random_seed=0, n_splits=200):
                          
    scoresA = []
    scoresP = []
    scoresR = []
    
    for n in range(0,n_splits):
        inp_tr, tar_tr, inp_va, tar_va, inp_te, tar_te = replicate_data(X, Y, 'seven', amounts_train, amounts_val,random.randint(0,1000))
        # scaling data according to training inputs
        scaler_S = StandardScaler().fit(inp_tr)
        inp_tr = scaler_S.transform(inp_tr)
        inp_va = scaler_S.transform(inp_va)
        estimator.fit(inp_tr, tar_tr.ravel())  
        pred_va = estimator.predict(inp_va)
        scoresA.append(accuracy_score(tar_va,pred_va))
        scoresR.append(recall_score(tar_va,pred_va,average=None,zero_division=1))  
        scoresP.append(precision_score(tar_va,pred_va,average=None,zero_division=1)) 
        print(f'n = {n}') 
    scoresR = list(map(list, zip(*scoresR)))
    scoresP = list(map(list, zip(*scoresP)))

    estimateA = np.mean(scoresA)*100.
    stderrA = np.std(scoresA)*100.
    
    estimateR = [np.mean(scoresR[0])*100.,np.mean(scoresR[1])*100.,np.mean(scoresR[2])*100.,np.mean(scoresR[3])*100.,np.mean(scoresR[4])*100.,np.mean(scoresR[5])*100.,np.mean(scoresR[6])*100.]
    stderrR = [np.std(scoresR[0])*100.,np.std(scoresR[1])*100.,np.std(scoresR[2])*100.,np.std(scoresR[3])*100.,np.std(scoresR[4])*100.,np.std(scoresR[5])*100.,np.std(scoresR[6])*100.]
    
    estimateP = [np.mean(scoresP[0])*100.,np.mean(scoresP[1])*100.,np.mean(scoresP[2])*100.,np.mean(scoresP[3])*100.,np.mean(scoresP[4])*100.,np.mean(scoresP[5])*100.,np.mean(scoresP[6])*100.]
    stderrP = [np.std(scoresP[0])*100.,np.std(scoresP[1])*100.,np.std(scoresP[2])*100.,np.std(scoresP[3])*100.,np.std(scoresP[4])*100.,np.std(scoresP[5])*100.,np.std(scoresP[6])*100.]
    
    return estimateR, stderrR, estimateP, stderrP, estimateA, stderrA



def bootstrap_estimate_MLP(NN, X, Y, n_splits=200, epochs =10000):
                        
    scoresA = []
    scoresP = []
    scoresR = []

    for n in range(0,n_splits):
        train_loader, val_loader = MLP_data_setup(X, Y)
        val_predictions, val_truth_values = main(epochs,NN,optimizer,train_loader,val_loader)
        scoresA.append(accuracy_score(val_truth_values,val_predictions))
        scoresR.append(recall_score(val_truth_values,val_predictions,average=None,zero_division=1))  
        scoresP.append(precision_score(val_truth_values,val_predictions,average=None,zero_division=1)) 
        print(f'n = {n}') 
    scoresR = list(map(list, zip(*scoresR)))
    scoresP = list(map(list, zip(*scoresP)))

    estimateA = np.mean(scoresA)*100.
    stderrA = np.std(scoresA)*100.
    
    estimateR = [np.mean(scoresR[0])*100.,np.mean(scoresR[1])*100.,np.mean(scoresR[2])*100.,np.mean(scoresR[3])*100.,np.mean(scoresR[4])*100.,np.mean(scoresR[5])*100.,np.mean(scoresR[6])*100.]
    stderrR = [np.std(scoresR[0])*100.,np.std(scoresR[1])*100.,np.std(scoresR[2])*100.,np.std(scoresR[3])*100.,np.std(scoresR[4])*100.,np.std(scoresR[5])*100.,np.std(scoresR[6])*100.]
    
    estimateP = [np.mean(scoresP[0])*100.,np.mean(scoresP[1])*100.,np.mean(scoresP[2])*100.,np.mean(scoresP[3])*100.,np.mean(scoresP[4])*100.,np.mean(scoresP[5])*100.,np.mean(scoresP[6])*100.]
    stderrP = [np.std(scoresP[0])*100.,np.std(scoresP[1])*100.,np.std(scoresP[2])*100.,np.std(scoresP[3])*100.,np.std(scoresP[4])*100.,np.std(scoresP[5])*100.,np.std(scoresP[6])*100.]
    
    return estimateR, stderrR, estimateP, stderrP, estimateA, stderrA

## MLP


In [4]:

def main(epochs, NetInstance, OptInstance, train_loader, val_loader, ScheduleInstance=None):

    for epoch in range(0, epochs):
        train_loss, train_predictions, train_truth_values = train(epoch, NetInstance, OptInstance, train_loader, device)
        val_loss, val_predictions, val_truth_values = validate(NetInstance, val_loader, device)
        
        if ScheduleInstance is not None:
            ScheduleInstance.step()

    return val_predictions, val_truth_values
    




def MLP_data_setup(X,Y):
    inp_tr, tar_tr, inp_va, tar_va, inp_te, tar_te = replicate_data(X, Y, 'seven', amounts_train, amounts_val,random.randint(0,1000))
            
    # scaling data according to training inputs
    scaler_S = StandardScaler().fit(inp_tr)
    inp_tr = scaler_S.transform(inp_tr)
    inp_va = scaler_S.transform(inp_va)
    # inp_te = scaler_S.transform(inp_te) 

    # creation of tensor instances

    inp_tr = torch.as_tensor(inp_tr)
    tar_tr = torch.as_tensor(tar_tr)
    inp_va = torch.as_tensor(inp_va)
    tar_va = torch.as_tensor(tar_va)
    # inp_te = torch.as_tensor(inp_te)
    # tar_te = torch.as_tensor(tar_te)

    # pass tensors into TensorDataset instances
    train_data = data_utils.TensorDataset(inp_tr, tar_tr)
    val_data = data_utils.TensorDataset(inp_va, tar_va)
    # test_data = data_utils.TensorDataset(inp_te, tar_te)

    # constructing data loaders
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=25, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size=25, shuffle=True)
    # test_loader = torch.utils.data.DataLoader(test_data, batch_size=25, shuffle=True)
    return train_loader, val_loader

### Two layer MLP

In [None]:
# create nn instance
TwoNN = TwoLayerMLP(8, 20, 7, weight_initialize=True)
## load settings in
optimizer = optim.SGD(TwoNN.parameters(), lr=4e-1, momentum=0.9)
estR, stderrR, estP, stderrP, estA, stderrA  = bootstrap_estimate_MLP(TwoNN, X, Y, n_splits=200, epochs =3000)
                            

In [6]:
classes = ["Class I", "Class II", "Galaxies", "AGNs", "Shocks", "PAHs", "Stars"]
f = open("PRAScores_2LayerMLP_7Classes.txt", "w")
f.write("TwoLayerMLP Recall & Precision & Accuracy\n")
for i, cl in enumerate(classes):
    if i==3:
        f.write(cl+"& $"+"{:.1f}".format(estR[i])+"\pm"+"{:.1f}".format(stderrR[i])+"$ & $"+
            "{:.1f}".format(estP[i])+"\pm"+"{:.1f}".format(stderrP[i])+"$ & $"+"{:.1f}".format(estA)+"\pm"+"{:.1f}".format(stderrA)+"$ // \n")
    else:
        f.write(cl+"& $"+"{:.1f}".format(estR[i])+"\pm"+"{:.1f}".format(stderrR[i])+"$ & $"+
            "{:.1f}".format(estP[i])+"\pm"+"{:.1f}".format(stderrP[i])+"$&// \n")

f.close()

### One layer MLP

In [9]:
import multiprocess as mp
import random

def bootstrap(NN,epochs):
    train_loader, val_loader = MLP_data_setup(X, Y)
    pred_va, tar_va = main(epochs,NN,optimizer,train_loader,val_loader)
    ScoresA = accuracy_score(tar_va,pred_va)
    ScoresR = recall_score(tar_va,pred_va,average=None,zero_division=1)
    ScoresP = precision_score(tar_va,pred_va,average=None,zero_division=1)

    return ScoresR, ScoresP, ScoresA

X = np.load("Input_Class_AllClasses_Sep.npy")
Y = np.load("Target_Class_AllClasses_Sep.npy") # For original targets via Gutermuth 2009 Method

amounts_train = [331,331,331,331,27,70,331]
amounts_val = [82, 531, 104, 278, 6, 17, 4359]



BaseNN = BaseMLP(8, 20, 7, weight_initialize=True)
## load settings in
optimizer = optim.SGD(BaseNN.parameters(), lr=4e-1, momentum=0.9)
# estR, stderrR, estP, stderrP, estA, stderrA  = bootstrap_estimate_MLP(BaseNN, X, Y, n_splits=200, epochs =50000)
iters = [(BaseNN,50000)] * 50
ans = []
for n in [0,1,2,3]:

    with mp.Pool(12) as pool:
        ans.append(pool.starmap(bootstrap, iters))
    np.save(f"intermediatesave_onelayerMLP_{n}.npy",ans)
    




In [None]:

scoresR = list(map(list, zip(*ans)))[0]
scoresP = list(map(list, zip(*ans)))[1]
scoresA = list(map(list, zip(*ans)))[2]


scoresR = list(map(list, zip(*scoresR)))
scoresP = list(map(list, zip(*scoresP)))


estA = np.mean(scoresA)*100.
stderrA = np.std(scoresA)*100.

estR = [np.mean(scoresR[0])*100.,np.mean(scoresR[1])*100.,np.mean(scoresR[2])*100.,np.mean(scoresR[3])*100.,np.mean(scoresR[4])*100.,np.mean(scoresR[5])*100.,np.mean(scoresR[6])*100.]
stderrR = [np.std(scoresR[0])*100.,np.std(scoresR[1])*100.,np.std(scoresR[2])*100.,np.std(scoresR[3])*100.,np.std(scoresR[4])*100.,np.std(scoresR[5])*100.,np.std(scoresR[6])*100.]

estP = [np.mean(scoresP[0])*100.,np.mean(scoresP[1])*100.,np.mean(scoresP[2])*100.,np.mean(scoresP[3])*100.,np.mean(scoresP[4])*100.,np.mean(scoresP[5])*100.,np.mean(scoresP[6])*100.]
stderrP = [np.std(scoresP[0])*100.,np.std(scoresP[1])*100.,np.std(scoresP[2])*100.,np.std(scoresP[3])*100.,np.std(scoresP[4])*100.,np.std(scoresP[5])*100.,np.std(scoresP[6])*100.]
  

In [7]:
# # create nn instance
# BaseNN = BaseMLP(8, 20, 7, weight_initialize=True)
# ## load settings in
# optimizer = optim.SGD(BaseNN.parameters(), lr=4e-1, momentum=0.9)
# estR, stderrR, estP, stderrP, estA, stderrA  = bootstrap_estimate_MLP(BaseNN, X, Y, n_splits=200, epochs =50000)

n = 0
n = 1
n = 2
n = 3
n = 4
n = 5
n = 6
n = 7
n = 8
n = 9
n = 10
n = 11
n = 12
n = 13
n = 14
n = 15
n = 16
n = 17
n = 18
n = 19
n = 20
n = 21
n = 22
n = 23
n = 24
n = 25
n = 26
n = 27
n = 28
n = 29
n = 30
n = 31
n = 32
n = 33
n = 34
n = 35
n = 36
n = 37
n = 38
n = 39
n = 40
n = 41
n = 42
n = 43
n = 44
n = 45
n = 46
n = 47
n = 48
n = 49
n = 50
n = 51
n = 52
n = 53
n = 54
n = 55
n = 56
n = 57
n = 58
n = 59
n = 60
n = 61
n = 62
n = 63
n = 64
n = 65
n = 66
n = 67
n = 68
n = 69
n = 70
n = 71
n = 72
n = 73
n = 74
n = 75
n = 76
n = 77
n = 78
n = 79
n = 80
n = 81
n = 82
n = 83
n = 84
n = 85
n = 86
n = 87
n = 88
n = 89
n = 90
n = 91
n = 92
n = 93
n = 94
n = 95
n = 96
n = 97
n = 98
n = 99
n = 100
n = 101
n = 102
n = 103
n = 104
n = 105
n = 106
n = 107
n = 108
n = 109
n = 110
n = 111
n = 112
n = 113
n = 114
n = 115
n = 116
n = 117
n = 118
n = 119
n = 120
n = 121
n = 122
n = 123
n = 124
n = 125
n = 126
n = 127
n = 128
n = 129
n = 130
n = 131
n = 132
n = 133
n = 134
n = 135
n = 136
n = 137
n = 13

In [None]:
classes = ["Class I", "Class II", "Galaxies", "AGNs", "Shocks", "PAHs", "Stars"]
f = open("PRAScores_1LayerMLP_7Classes.txt", "w")
f.write("OneLayerMLP & Recall & Precision & Accuracy//\n")
for i, cl in enumerate(classes):
    if i==3:
        f.write(cl+"& $"+"{:.1f}".format(estR[i])+"\pm"+"{:.1f}".format(stderrR[i])+"$ & $"+
            "{:.1f}".format(estP[i])+"\pm"+"{:.1f}".format(stderrP[i])+"$ & $"+"{:.1f}".format(estA)+"\pm"+"{:.1f}".format(stderrA)+"$ // \n")
    else:
        f.write(cl+"& $"+"{:.1f}".format(estR[i])+"\pm"+"{:.1f}".format(stderrR[i])+"$ & $"+
            "{:.1f}".format(estP[i])+"\pm"+"{:.1f}".format(stderrP[i])+"$&// \n")

f.close()