In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import numpy as np
from skorch.callbacks import EpochScoring
from sklearn.preprocessing import StandardScaler
from skorch import NeuralNetRegressor
from sklearn.model_selection import train_test_split
import pandas as pd
from scipy.signal import savgol_filter
from scipy.stats import zscore

In [36]:
data = pd.read_csv("./train.csv")

spectrum = data.iloc[:, 6:]
spectrum_filtered = pd.DataFrame(savgol_filter(spectrum, 7, 3, deriv = 2, axis = 0))
spectrum_filtered_st = zscore(spectrum_filtered, axis = 1)

X = spectrum_filtered_st
y = data["PURITY"]

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).reshape(-1, 1)
X_valid_tensor = torch.tensor(X_valid.values, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32).reshape(-1, 1)

#check for nan and inf
print(X_train.isna().sum())
print(y_train.isna().sum())
print(X_valid.isna().sum())
print(y_valid.isna().sum())

print(np.isinf(X_train.values).sum())
print(np.isinf(y_train.values).sum())
print(np.isinf(X_valid.values).sum())
print(np.isinf(y_valid.values).sum())


0      0
1      0
2      0
3      0
4      0
      ..
120    0
121    0
122    0
123    0
124    0
Length: 125, dtype: int64
0
0      0
1      0
2      0
3      0
4      0
      ..
120    0
121    0
122    0
123    0
124    0
Length: 125, dtype: int64
0
0
0
0
0


In [37]:
#check for dimensions
print("Dimensions de X_train_tensor :", X_train_tensor.shape)
print("Dimensions de y_train_tensor :", y_train_tensor.shape)
print("Dimensions de X_valid_tensor :", X_valid_tensor.shape)
print("Dimensions de y_valid_tensor :", y_valid_tensor.shape)


Dimensions de X_train_tensor : torch.Size([1040, 125])
Dimensions de y_train_tensor : torch.Size([1040, 1])
Dimensions de X_valid_tensor : torch.Size([260, 125])
Dimensions de y_valid_tensor : torch.Size([260, 1])


In [55]:
class SimpleNN(nn.Module):
    def __init__(self, fc1_out_features=100, fc2_out_features=64):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(X_train_tensor.shape[1], fc1_out_features)
        self.fc2 = nn.Linear(fc1_out_features, fc2_out_features)
        self.fc3 = nn.Linear(fc2_out_features, 1)

      
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [57]:
#création du modèle
net = NeuralNetRegressor(
    SimpleNN,
    max_epochs=10,
    lr=0.1,
    iterator_train__shuffle=False,
    criterion=torch.nn.MSELoss,
    optimizer=torch.optim.Adam,
    callbacks=[EpochScoring('neg_mean_squared_error', lower_is_better=True)]
)

net.fit(X_train_tensor, y_train_tensor)
y_pred = net.predict(X_valid_tensor)
print("Predictions:", y_pred)
#mse = mean_squared_error(y_valid_tensor.numpy(), y_pred)
#print("Mean Squared Error:", mse)

NameError: name 'EpochScoring' is not defined

In [48]:
'''
params = {
    'lr': [0.01, 0.02, 0.05],
    'max_epochs': [10, 20],
    'module__fc1_out_features': [100, 128],
    'module__fc2_out_features': [64, 128],
}

grid_search = GridSearchCV(net, params, refit=True, cv=5, scoring='neg_mean_squared_error')
grid_result = grid_search.fit(X_train_tensor, y_train_tensor)

#print("Best MSE: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
''' 

'\nparams = {\n    \'lr\': [0.01, 0.02, 0.05],\n    \'max_epochs\': [10, 20],\n    \'module__fc1_out_features\': [100, 128],\n    \'module__fc2_out_features\': [64, 128],\n}\n\ngrid_search = GridSearchCV(net, params, refit=True, cv=5, scoring=\'neg_mean_squared_error\')\ngrid_result = grid_search.fit(X_train_tensor, y_train_tensor)\n\n#print("Best MSE: %f using %s" % (grid_result.best_score_, grid_result.best_params_))\n'

In [None]:
'''
# Effectuer la recherche par grille avec error_score='raise' pour des messages d'erreur détaillés
try:
    grid_search = GridSearchCV(net, params, refit=True, cv=5, scoring='neg_mean_squared_error', error_score='raise')
    grid_result = grid_search.fit(X_train_tensor, y_train_tensor)
    print("Best MSE: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
except Exception as e:
    print("Erreur lors de l'exécution de GridSearchCV :", e)
'''

# Testez un modèle simple sans GridSearchCV
try:
    net.fit(X_train_tensor, y_train_tensor)
    y_pred = net.predict(X_valid_tensor)
    mse = np.mean((y_pred - y_valid_tensor.numpy())**2)
    print("MSE sans GridSearchCV :", mse)
except Exception as e:
    print("Erreur lors de l'entraînement du modèle simple :", e)

Re-initializing module.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1           nan           nan  0.0780
      2           nan           nan  0.0753
      3           nan           nan  0.0643
      4           nan           nan  0.0552
      5           nan           nan  0.0430
      6           nan           nan  0.0405
      7           nan           nan  0.0596
      8           nan           nan  0.0685
      9           nan           nan  0.0645
     10           nan           nan  0.0697
x_train tensor([[ 1.2211,  1.0888,  0.9280,  ...,  2.4798,  2.5090,  2.4972],
        [-0.7959, -0.8517, -0.9336,  ...,  1.6534,  1.4342,  1.2044],
        [ 0.0804,  0.2835,  1.5408,  ...,  0.6895,  1.1460,  1.5403],
        ...,
        [-0.1051,  0.1027,  0.3173,  ..., -2.1049, -2.0342, -1.9146],
        [-1.6750, -1.5430, -1.7206,  ..., -3.1217, -3.4121, -3.6190],
        [ 2.4298,  2.21

In [45]:
mach2 = grid_result.best_estimator_
mach2.fit(X_train_tensor, y_train_tensor)
predictions = mach2.predict(torch.tensor(X_valid_tensor[172:, :], dtype=torch.float32))
mach2.get_params()

Re-initializing module because the following parameters were re-set: fc1_out_features, fc2_out_features.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss            valid_loss     dur
-------  ------------  --------------------  ------
      1   [36m778115.0402[0m  [32m120782217241206.1562[0m  0.1782
      2           nan           nan  0.0507
      3           nan           nan  0.0527
      4           nan           nan  0.0457
      5           nan           nan  0.0487
      6           nan           nan  0.0442
      7           nan           nan  0.0306
      8           nan           nan  0.0409
      9           nan           nan  0.0408
     10           nan           nan  0.0469


  predictions = mach2.predict(torch.tensor(X_valid_tensor[172:, :], dtype=torch.float32))


{'module': __main__.SimpleNN,
 'criterion': torch.nn.modules.loss.MSELoss,
 'optimizer': torch.optim.sgd.SGD,
 'lr': 0.01,
 'max_epochs': 10,
 'batch_size': 128,
 'iterator_train': torch.utils.data.dataloader.DataLoader,
 'iterator_valid': torch.utils.data.dataloader.DataLoader,
 'dataset': skorch.dataset.Dataset,
 'train_split': <skorch.dataset.ValidSplit object at 0x0000024CCAEDF3D0>,
 'callbacks': None,
 'predict_nonlinearity': 'auto',
 'warm_start': False,
 'verbose': 1,
 'device': 'cpu',
 'compile': False,
 'use_caching': 'auto',
 '_params_to_validate': {'iterator_train__shuffle',
  'module__fc1_out_features',
  'module__fc2_out_features'},
 'iterator_train__shuffle': True,
 'module__fc1_out_features': 100,
 'module__fc2_out_features': 64,
 'callbacks__epoch_timer': <skorch.callbacks.logging.EpochTimer at 0x24ccb098100>,
 'callbacks__train_loss': <skorch.callbacks.scoring.PassthroughScoring at 0x24ccb0b35b0>,
 'callbacks__train_loss__name': 'train_loss',
 'callbacks__train_loss__l

In [None]:

predictions_tensor = torch.tensor(predictions, dtype=torch.float32)
y_valid_tensor_sliced = y_valid_tensor[172:, :]  
t_score = torch.mean((torch.abs(predictions_tensor - y_valid_tensor_sliced) <= 5).float())
print(t_score.item())

0.0
