# Neural Network ML-cup Dataset, Pytorch

# Import Libraries

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import ParameterGrid
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer

In [None]:

from skorch import NeuralNetRegressor



In [None]:
from sklearn.metrics import mean_absolute_error

## Read the Dataset

In [None]:
def ReadFile(s):
  column=['Id','i1','i2','i3','i4','i5','i6','i7','i8','i9','i10','Y1','Y2','Y3']
  dataset=pd.read_csv(s,sep=",", names=column,skiprows=7)
  dataset.set_index('Id', inplace=True)
  return dataset

In [None]:
data=ReadFile("Dataset_Cup/ML-CUP23-TR.csv")

## Dataset divided in target and feature

In [None]:
featureTrain=data.iloc[:,0:10]
TargetTrain=data.iloc[:,10:13]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(featureTrain.values, TargetTrain.values, test_size=0.25,random_state=42)


### Function to compute the mean eucledian error

In [None]:
def mean_euclidean_error(y_true, y_pred):
    # Calculate Euclidean distance between y_true and y_pred
    euclidean_dist = torch.norm(y_true - y_pred, p=2, dim=1)  # Calculate L2 norm along the axis of examples

    # Calculate mean of Euclidean distances
    mee = torch.mean(euclidean_dist)
    return mee


In [None]:
def custom_scoring(y_true, y_pred):
    return mean_euclidean_error(y_true, y_pred)

## Function to create the model

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self,h_units):
        super(NeuralNetwork, self).__init__()
        self.layers = []
        self.acts = []
        self.layers.append(nn.Linear(10,h_units))
        self.acts.append(nn.Tanh())
        for i in range(1,3):
            self.layers.append(nn.Linear(h_units,h_units))
            self.acts.append(nn.Tanh())
            self.add_module(f"layer{i}", self.layers[-1])
            self.add_module(f"act{i}", self.acts[-1])
        self.output = nn.Linear(h_units, 3)
# forward propagation
    def forward(self, x):
        for layer, act in zip(self.layers, self.acts):
          x = act(layer(x))
        x = self.output(x)

        return x

## Model Selection
fixed number of neurons and number of layer:
Search the learning rate, the number of epochs and batch_size, momentum and weight decay

In [None]:
param_grid = {
    'optimizer__lr': [0.002,0.01,0.0008,0.005, 0.03],
    'optimizer__momentum':[0.0, 0.5, 0.8],
    'optimizer__weight_decay': [0.0001, 0.001, 0.01, 0.1],
    'module__h_units':[50,100,150]
}


In [None]:
scorer = make_scorer(mean_euclidean_error, greater_is_better=False)

In [None]:
model = NeuralNetRegressor(
    module=NeuralNetwork,
    criterion=nn.MSELoss,
    verbose=True,
    max_epochs=200,
    batch_size=8,
    optimizer=optim.SGD
)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(featureTrain.values, TargetTrain.values, test_size=0.25,random_state=42)


In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

## Use randomized Search

In [None]:
grid=RandomizedSearchCV(estimator=model, param_distributions=param_grid,cv=5, verbose=4, n_jobs=-1, random_state=42,n_iter=50)
grid_result=grid.fit(X_train_tensor,y_train_tensor)

In [None]:
print("Best Results: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Try the model on the Data

In [None]:
model = NeuralNetwork(150)  # Adjust hidden_size as needed
criterion = nn.MSELoss()  # Mean Squared Error loss for regression task
optimizer = optim.SGD(model.parameters(), lr=0.005,weight_decay=0.001,momentum=0.5)  # SGD optimizer with momentum

# Training loop

In [None]:
# Lists to store loss values for plotting
epoch_train_losses = []
epoch_val_losses=[]
epoch_train_mees=[]
epoch_val_mees=[]
# Second division to see the different btw Val and Train
X_tr, X_val, y_tr, y_val = train_test_split(X_train,y_train, test_size=0.2,random_state=42)

X_train_tensor = torch.tensor(X_tr, dtype=torch.float32)
y_train_tensor = torch.tensor(y_tr, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

dataset = TensorDataset(X_train_tensor, y_train_tensor)
valdataset = TensorDataset(X_val_tensor, y_val_tensor)

batch_size = 16  # Adjusted batch size
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(valdataset, batch_size=batch_size, shuffle=False)
num_epochs=200

In [None]:
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    train_mee = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        euclidean_diff = torch.sqrt(torch.sum((outputs - targets) ** 2, dim=1))
        mee = torch.mean(euclidean_diff)
        train_mee += mee.item()

    epoch_train_loss = train_loss / len(train_loader)
    epoch_train_losses.append(epoch_train_loss)

    epoch_train_mee = train_mee / len(train_loader)  # Calcola il MEE medio per l'epoca di training
    epoch_train_mees.append(epoch_train_mee)

    # Validation phase
    model.eval()
    running_val_loss = 0.0
    running_mae = 0.0
    running_val_loss = 0.0
    val_mee=0.0
    with torch.no_grad():
        for val_inputs, val_targets in val_loader:
            val_outputs = model(val_inputs)
            val_loss = criterion(val_outputs, val_targets)
            running_val_loss += val_loss.item()

            euclidean_diff_val = torch.sqrt(torch.sum((val_outputs - val_targets) ** 2, dim=1))
            mee_val = torch.mean(euclidean_diff_val)
            val_mee += mee_val.item()

        epoch_val_loss = running_val_loss / len(val_loader)
        epoch_val_losses.append(epoch_val_loss)

        epoch_val_mee = val_mee / len(val_loader)  # Calcola il MEE medio per l'epoca di validazione
        epoch_val_mees.append(epoch_val_mee)  # Salva il MEE dell'epoca di validazione



    print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {epoch_train_loss:.4f}, Val Loss: {epoch_val_loss:.4f}, Train MEE: {epoch_train_mee:.4f}, Val MEE: {epoch_val_mee:.4f}")

# Plotting training and validation loss values



In [None]:
plt.plot(epoch_train_losses, label='Training Loss')
plt.plot(epoch_val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.title('Training and Validation MSE Over Epochs')
plt.yticks(np.arange(0, 11, 1))
plt.ylim(0,10)
plt.xlim(0,200)
plt.grid()
plt.legend(loc='center right')
plt.show()

In [None]:
plt.plot(epoch_train_mees, label='Training Loss')
plt.plot(epoch_val_mees, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mena Euclidian Error')
plt.title('Training and Validation MEE Over Epochs')
plt.yticks(np.arange(0, 11, 1))
plt.ylim(0,10)
plt.xlim(0,200)
plt.grid()
plt.legend(loc='center right')
plt.show()

In [None]:
train_loss = sum(epoch_train_losses) / len(epoch_train_losses)
val_loss = sum(epoch_val_losses) / len(epoch_val_losses)

train_mee = sum(epoch_train_mees) / len(epoch_train_mees)
val_mee = sum(epoch_val_mees) / len(epoch_val_mees)

print(f"MSE Train: {train_loss:.4f}")
print(f"MSE Validation: {val_loss:.4f}")

print(f"MEE Train: {train_mee:.4f}")
print(f"MEE Validation: {val_mee:.4f}")

In [None]:
model.eval()
predictions=model(X_test_tensor) #y_test_tensor


In [None]:
MSE=mean_squared_error(y_test_tensor.detach().numpy(),predictions.detach().numpy())
MEE=mean_euclidean_error = np.sqrt(np.sum((predictions.detach().numpy() - y_test_tensor.detach().numpy()) ** 2) / len(predictions))

In [None]:
print(f"MSE Test: {MSE:.4f}")
print(f"MEE Test: {MEE:.4f}")