# Forward models

This notebook reproduces the resutls from the forward models presented in the paper.

It needs two datasets:
- The reconstructed pulse shapes (need to use `pulse_reconstruction_hr.ipynb`)
- The experimental dataset from galadriel

In [None]:
import pandas as pd
import numpy as np

import torch

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

import models.fcnn as models_fcnn

### LOADING THE DATASET

In [None]:
filename='../laps-ml/datasets/galadriel_dataset_24_09_18_high_res.h5'
filename_pulse='../laps-ml/datasets/pulse_240918.h5'

In [None]:
#read the Dazller input data
df_input=pd.read_hdf(filename,'df_input')

#read the reconstructed pulse data
df_time_200=pd.read_hdf(filename_pulse,'df_time_200')
df_pulse_200=pd.read_hdf(filename_pulse,'df_pulse_200')
t_200=df_time_200.to_numpy()

### Drop shots with bad goodness

In [None]:
goodness_val=1.3
df_input_reduced=df_input[df_input['goodness']>goodness_val]

good_shots_list=df_input_reduced.index.values
#print(good_shots_list)
df_pulse_200_reduced=df_pulse_200.iloc[good_shots_list]

#reset the index
df_input_reduced.reset_index(inplace=True,drop=True)
df_pulse_200_reduced.reset_index(inplace=True,drop=True)

### Define the model variables

In [None]:
#split the dataset (X:dazzler param, Y:wizzler)
X_train, X_test, y_train, y_test = train_test_split(df_input_reduced, df_pulse_200_reduced, test_size=0.3, random_state=101)

## Modeling

In [None]:
# set the device we will be using to train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device="cpu"
print("We are using:",device)

We are using: cuda


### Linear model

In [None]:
def train_lin_forward(X_train, X_test, y_train, y_test):
    #Reset the index on y_test to have the same indexes as y_predict
    y_test_reset=y_test.reset_index(drop=True)

    forward_model_lin=LinearRegression().fit(X_train,y_train)

    y_predict_forward=forward_model_lin.predict(X_test)
    
    #study the error distribution
    df_error_forward_lin=abs(y_test_reset-y_predict_forward)
    df_error_forward_lin=df_error_forward_lin.sum(axis=1)/y_test.shape[1] #sum error / num columns
    
    
    return forward_model_lin, df_error_forward_lin

In [None]:
forward_model_lin,df_error_forward_lin=train_lin_forward(X_train, X_test, y_train, y_test)

### Random Forest

In [None]:
def train_rf_forward(X_train, X_test, y_train, y_test):
    #Reset the index on y_test to have the same indexes as y_predict
    y_test_reset=y_test.reset_index(drop=True)

    #train forward model with Random forest
    n_estimators = 300
    max_features = 1.0#'sqrt'
    max_depth=20
    random_state=18

    forward_model_rf=RandomForestRegressor(n_estimators=n_estimators,
                                            max_features=max_features,
                                            max_depth=max_depth,
                                              random_state=random_state)
    forward_model_rf.fit(X_train, y_train)
    y_predict_forward=forward_model_rf.predict(X_test)

    #study the erro distribution
    df_error_forward_rf=abs(y_test_reset-y_predict_forward)
    df_error_forward_rf=df_error_forward_rf.sum(axis=1)/y_test.shape[1] #sum error / num columns

    return forward_model_rf,df_error_forward_rf

In [None]:
forward_model_rf,df_error_forward_rf=train_rf_forward(X_train, X_test, y_train, y_test)

### Multi-Layer Perceptron

In [None]:
forward_model_mlp=models_fcnn.FWmodelNN(X_train,y_train,X_test,y_test,device)
forward_model_mlp.train(100,#epochs 
                 128,#batch size
                 20, #print freq
                 0.001) #learning rate
y_predict_nn_fwd=forward_model_mlp.predict(X_test)
error_fwd_model_mlp=forward_model_mlp.error_calc_mae()

AcceleratorError: CUDA error: CUDA-capable device(s) is/are busy or unavailable
Search for `cudaErrorDevicesUnavailable' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information.
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
