In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import numpy as np
from skorch.callbacks import EpochScoring
from sklearn.preprocessing import StandardScaler
from skorch import NeuralNetRegressor
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
data = pd.read_csv("./train.csv")

spectrum = data.iloc[:, 6:]

#spectrum_filtered = pd.DataFrame(savgol_filter(spectrum, 7, 3, deriv = 2, axis = 0))
#spectrum_filtered_st = zscore(spectrum_filtered, axis = 1)

X = spectrum
y = data["PURITY"]

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

standardizer = StandardScaler()
X_train_standardized = standardizer.fit_transform(X_train)
X_valid_standardized = standardizer.transform(X_valid)

# Convert to tensors
X_train_tensor = torch.tensor(X_train_standardized, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).reshape(-1, 1)
X_valid_tensor = torch.tensor(X_valid_standardized, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32).reshape(-1, 1)

In [3]:
model = nn.Sequential(
    nn.Linear(X_train_tensor.shape[1], 100),
    nn.ReLU(),
    nn.Linear(100, 64),
    nn.ReLU(),
    nn.Linear(64, 1),
    nn.Sigmoid()
)
 
# loss function and optimizer
loss_fn = nn.BCELoss()  # binary cross entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
params = {
    'eta': [0.001, 0.005, 0.01],
    'max_epochs': [20,25],
    'module__fc1_out_features': [100, 128],
    'module__fc2_out_features': [64, 128],
}

grid_search = GridSearchCV(model, params, refit=True, cv=5, scoring='neg_mean_squared_error',verbose=0)
grid_result = grid_search.fit(X_train_tensor, y_train_tensor)
nouveau_model = grid_result.best_estimator_

print("Best MSE: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
print(grid_result.best_params_["eta"])

InvalidParameterError: The 'estimator' parameter of GridSearchCV must be an object implementing 'fit'. Got Sequential(
  (0): Linear(in_features=125, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=1, bias=True)
  (5): Sigmoid()
) instead.

In [None]:
n_epochs = grid_result.best_params_["max_epochs"]    # number of epochs to run
batch_size = 10  # size of each batch
batches_per_epoch = len(X_train_tensor) // batch_size
train_loss = []
 
for epoch in range(n_epochs):
    for i in range(batches_per_epoch):
        start = i * batch_size
        # take a batch
        Xbatch = X_train_tensor[start:start+batch_size]
        ybatch = X_train_tensor[start:start+batch_size]
        # forward pass
        y_pred = model(Xbatch)
        loss = loss_fn(y_pred, ybatch)
        # store metrics
        train_loss.append(float(loss))
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
        
        

In [None]:
# evaluate trained model with test set
with torch.no_grad():
    y_pred = model(X)
# Calculer la MSE
mse = np.mean((y_pred - y_valid_tensor.numpy())**2)
print("MSE :", mse)

# Calculer le t_score
predictions_tensor = torch.tensor(y_pred, dtype=torch.float32)
t_score = torch.mean((torch.abs(predictions_tensor - y_valid_tensor) <= 5).float())
print("t_score :", t_score.item())

# Afficher la courbe d'apprentissage
import matplotlib.pyplot as plt
plt.plot(train_loss)
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.title('Learning Curve')
plt.show()