In [1]:
import numpy as np
import torch
from skorch.dataset import Dataset
from skorch.helper import predefined_split
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetRegressor
from sklearn.model_selection import GridSearchCV
import pickle
import sys
import pandas as pd
import time
from datetime import datetime
import matplotlib.pyplot as plt
from tqdm.auto import tqdm, trange
from sklearn.model_selection import train_test_split
from skorch.callbacks import EarlyStopping, LRScheduler
sys.path.append('../')
from pyfiles.helpful_functions import InputLogTransformer, OutputLogTransformer, build_neural_network, make_datasets, LDIAModel
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
noise = 10 # ADJUST level of gaussian noise added to outputs
mod_type = 'nn'
description = mod_type + '_noise-' + str(noise)
filename = '../datasets/fuchs_v3-2_seed-5_points_25000_noise_' + str(noise) + '.csv'  # CHANGE TO DESIRED DATA FILE
df = pd.read_csv(filename)

In [3]:
input_list = ['Intensity_(W_cm2)', 'Target_Thickness (um)', 'Focal_Distance_(um)'] # independent variables
output_list = ['Max_Proton_Energy_(MeV)', 'Total_Proton_Energy_(MeV)', 'Avg_Proton_Energy_(MeV)'] # training outputs

X = np.array(df[input_list],dtype=np.float32)
y = np.array(df[output_list],dtype=np.float32)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, shuffle = False) # Discard Testing Set for now
pct = 25 # Using 5,000/20,000 points in training/validation set for Grid Search
len_df = int(len(X_train)*(pct/100))
X_train = X_train[0:len_df]
y_train = y_train[0:len_df]
X_train, y_train, X_val, y_val, input_transformer, output_transformer = make_datasets(X_train, y_train, random_state=42)
train_ds = Dataset(X_train, y_train)
valid_ds = Dataset(X_val, y_val)
print(len(train_ds))
print(len(valid_ds))

4000
1000


In [4]:
model = NeuralNetRegressor(
    module=LDIAModel,
    max_epochs = 100,
    module__n_hidden=1,
    module__n_neurons = 16,
    module__activation=nn.LeakyReLU(),
    device=device,
    criterion = nn.MSELoss(),
    batch_size = 32,
    optimizer__lr = 1e-3,
    iterator_train__shuffle=True,
    callbacks=[EarlyStopping(patience=5,monitor='valid_loss'), ('lr_scheduler', LRScheduler(policy='ExponentialLR',gamma=.9))],
    verbose=0, 
    train_split = predefined_split(valid_ds)
)

In [66]:
param_grid = {
    'module__n_hidden':[4],
    'module__n_neurons':[16, 32, 64],
    'module__activation':[nn.LeakyReLU()],
    'optimizer':[optim.Adam],
    'callbacks__lr_scheduler__gamma':[0.95],
    'batch_size': [256],
    'optimizer__lr': [1e-3],
    'train_split': [predefined_split(valid_ds)]
}

In [67]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, refit=False, n_jobs=1, cv=5,scoring='neg_mean_squared_error',verbose=3)
grid_result = grid.fit(train_ds.X, train_ds.y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
# for mean, stdev, param in zip(means, stds, params):
#     print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV 1/5] END batch_size=256, callbacks__lr_scheduler__gamma=0.95, module__activation=LeakyReLU(negative_slope=0.01), module__n_hidden=4, module__n_neurons=16, optimizer=<class 'torch.optim.adam.Adam'>, optimizer__lr=0.001, train_split=functools.partial(<function _make_split at 0x2b30c6a7c670>, valid_ds=<skorch.dataset.Dataset object at 0x2b30d167d040>);, score=-0.009 total time=   5.3s
[CV 2/5] END batch_size=256, callbacks__lr_scheduler__gamma=0.95, module__activation=LeakyReLU(negative_slope=0.01), module__n_hidden=4, module__n_neurons=16, optimizer=<class 'torch.optim.adam.Adam'>, optimizer__lr=0.001, train_split=functools.partial(<function _make_split at 0x2b30c6a7c670>, valid_ds=<skorch.dataset.Dataset object at 0x2b30d167d040>);, score=-0.009 total time=   4.9s
[CV 3/5] END batch_size=256, callbacks__lr_scheduler__gamma=0.95, module__activation=LeakyReLU(negative_slope=0.01), module__n_hidden=4, module__n_neurons=16, opt

In [68]:
print("Best Score: %f using \n%s" % (grid_result.best_score_, grid_result.best_params_))
# idxs = np.argsort(means)
# idx = 1
# print(means[idxs[-idx]])
# print(stds[idxs[-idx]])
# print(params[idxs[-idx]])

Best Score: -0.005617 using 
{'batch_size': 256, 'callbacks__lr_scheduler__gamma': 0.95, 'module__activation': LeakyReLU(negative_slope=0.01), 'module__n_hidden': 4, 'module__n_neurons': 64, 'optimizer': <class 'torch.optim.adam.Adam'>, 'optimizer__lr': 0.001, 'train_split': functools.partial(<function _make_split at 0x2b30c6a7c670>, valid_ds=<skorch.dataset.Dataset object at 0x2b30d167d040>)}


In [69]:
with open('nn_cv_results/grid_search_4.pkl', 'wb') as file:
    pickle.dump(grid_result.cv_results_, file)