# Installazione delle Librerie

In [None]:
%pip install numpy pandas scikit-learn torch pytorch-tabnet pytorch-tabular joblib

# Import e definizioni delle funzioni

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
import torch.nn as nn
import torch.optim as optim
from pytorch_tabnet.tab_model import TabNetRegressor
from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, ModelConfig, OptimizerConfig, TrainerConfig
import joblib

random_state = 89

def save_model(model, filename):
    joblib.dump(model, f'{filename}.pkl')

def preprocess(X, scaler_type='standard', use_pca=False, n_components=None):
    if scaler_type == 'standard':
        scaler = StandardScaler()
    elif scaler_type == 'minmax':
        scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    
    pca = None
    if use_pca and n_components:
        pca = PCA(n_components=n_components, random_state=random_state)
        X_scaled = pca.fit_transform(X_scaled)
    
    return X_scaled, scaler, pca


# Caricamento dei Dati

In [None]:
# Caricamento dei dati
df = pd.read_csv('training_set.csv')
X = df.drop(columns='Anno')
y = df['Anno']

# Suddividi i dati in training e validation set
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=random_state)

# Funzioni di Training

In [None]:
# Funzione per addestrare i modelli tradizionali
def train_model(X_train, y_train, model_type='LR'):
    if model_type == 'LR':
        model = LinearRegression()
        model.fit(X_train, y_train)
        best_params = None
    elif model_type == 'RF':
        model = RandomForestRegressor(random_state=random_state)
        model.fit(X_train, y_train)
        best_params = None
    elif model_type == 'KNR':
        model = KNeighborsRegressor()
        model.fit(X_train, y_train)
        best_params = None
    elif model_type == 'SVR':
        model = SVR()
        model.fit(X_train, y_train)
        best_params = None
    return model, best_params

# Funzione per addestrare la rete neurale feed-forward
class FeedForwardNN(nn.Module):
    def __init__(self, input_dim):
        super(FeedForwardNN, self).__init__()
        self.layer1 = nn.Linear(input_dim, 64)
        self.layer2 = nn.Linear(64, 64)
        self.layer3 = nn.Linear(64, 1)

    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.layer3(x)
        return x

def train_ffnn(X_train, y_train, input_dim):
    model = FeedForwardNN(input_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
    
    for epoch in range(100):  # Number of epochs can be adjusted
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()
    
    return model

# Funzione per addestrare TabNet
def train_tabnet(X_train, y_train, X_val, y_val):
    tabnet = TabNetRegressor(verbose=1, seed=random_state)
    tabnet.fit(X_train, y_train, eval_set=[(X_val, y_val)], patience=50, max_epochs=1000)
    return tabnet

# Funzione per addestrare TabTransformer
def train_tabtransformer(df_train, df_val, target_col='Anno'):
    data_config = DataConfig(
        target=target_col,
        continuous_cols=df_train.columns.drop(target_col).tolist()
    )
    model_config = ModelConfig(
        task="regression",
        learning_rate=1e-3,
        seed=random_state
    )
    trainer_config = TrainerConfig(
        max_epochs=100,
        gpus=1 if torch.cuda.is_available() else 0
    )
    tabular_model = TabularModel(
        data_config=data_config,
        model_config=model_config,
        optimizer_config=OptimizerConfig(),
        trainer_config=trainer_config
    )
    tabular_model.fit(train=df_train, validation=df_val)
    return tabular_model

# Preprocessing e salvataggio dei risultati

In [None]:
# Esegui il preprocessing specifico per ogni modello con Standard Scaler e Min-Max Scaler
preprocessing_options = {
    'LR': {'scaler_type': ['standard', 'minmax'], 'use_pca': [True, False], 'n_components': 52},
    'RF': {'scaler_type': ['standard', 'minmax'], 'use_pca': [True, False], 'n_components': 52},
    'KNR': {'scaler_type': ['standard', 'minmax'], 'use_pca': [True, False], 'n_components': 52},
    'SVR': {'scaler_type': ['standard', 'minmax'], 'use_pca': [True, False], 'n_components': 52},
    'FFNN': {'scaler_type': ['standard', 'minmax'], 'use_pca': [True, False], 'n_components': 52},
    'TabNet': {'scaler_type': ['standard', 'minmax'], 'use_pca': [False], 'n_components': None},
    'TabTransformer': {'scaler_type': ['standard', 'minmax'], 'use_pca': [False], 'n_components': None},
}

# Funzione per eseguire il preprocessing e salvare i risultati
def preprocess_and_save(X_train, X_val, preprocessing_options):
    scalers = {}
    pcas = {}
    for clfName, options in preprocessing_options.items():
        for scaler_type in options['scaler_type']:
            for use_pca in options['use_pca']:
                key = f"{clfName}_{scaler_type}_{'PCA' if use_pca else 'NoPCA'}"
                X_train_scaled, scaler, pca = preprocess(X_train, scaler_type=scaler_type, use_pca=use_pca, n_components=options['n_components'])
                X_val_scaled = scaler.transform(X_val)
                if pca:
                    X_val_scaled = pca.transform(X_val_scaled)
                scalers[key] = scaler
                pcas[key] = pca
                save_model(scaler, f'scaler_{key.lower()}')
                if pca:
                    save_model(pca, f'pca_{key.lower()}')
    return scalers, pcas

# Esegui il preprocessing per tutte le combinazioni di scaler e PCA
scalers, pcas = preprocess_and_save(X_train, X_val, preprocessing_options)

# Training modelli tradizionali

In [None]:
# Addestra i modelli tradizionali con tutte le combinazioni di scaler e PCA
models = {}
for clfName in ['LR', 'RF', 'KNR', 'SVR']:
    for scaler_type in preprocessing_options[clfName]['scaler_type']:
        for use_pca in preprocessing_options[clfName]['use_pca']:
            key = f"{clfName}_{scaler_type}_{'PCA' if use_pca else 'NoPCA'}"
            X_train_scaled = scalers[key].transform(X_train)
            if pcas[key]:
                X_train_scaled = pcas[key].transform(X_train_scaled)
            model, params = train_model(X_train_scaled, y_train, model_type=clfName)
            models[key] = model
            save_model(model, f'model_{clfName.lower()}_{scaler_type}_{"pca" if use_pca else "nopca"}')
            print(f"Trained {clfName} with {scaler_type} scaler and PCA={use_pca}, parameters: {params}")

# Training FFNN

In [None]:
# Addestra il modello FFNN con tutte le combinazioni di scaler e PCA
for scaler_type in preprocessing_options['FFNN']['scaler_type']:
    for use_pca in preprocessing_options['FFNN']['use_pca']:
        key = f"FFNN_{scaler_type}_{'PCA' if use_pca else 'NoPCA'}"
        X_train_scaled = scalers[key].transform(X_train)
        if pcas[key]:
            X_train_scaled = pcas[key].transform(X_train_scaled)
        ffnn_model = train_ffnn(X_train_scaled, y_train, input_dim=X_train_scaled.shape[1])
        models[key] = ffnn_model
        save_model(ffnn_model, f'model_ffnn_{scaler_type}_{"pca" if use_pca else "nopca"}')
        print(f"Trained FFNN with {scaler_type} scaler and PCA={use_pca}")

# Training TabNet

In [None]:
# Addestra il modello TabNet (solo senza PCA)
for scaler_type in preprocessing_options['TabNet']['scaler_type']:
    key = f"TabNet_{scaler_type}_NoPCA"
    X_train_scaled = scalers[key].transform(X_train)
    X_val_scaled = scalers[key].transform(X_val)
    tabnet_model = train_tabnet(X_train_scaled, y_train, X_val_scaled, y_val)
    models[key] = tabnet_model
    save_model(tabnet_model, f'model_tabnet_{scaler_type}')
    print(f"Trained TabNet with {scaler_type} scaler")

# Training TabTransformer

In [None]:
# Addestra il modello TabTransformer (solo senza PCA)
df_train = pd.concat([X_train, y_train], axis=1)
df_val = pd.concat([X_val, y_val], axis=1)

for scaler_type in preprocessing_options['TabTransformer']['scaler_type']:
    key = f"TabTransformer_{scaler_type}_NoPCA"
    tabtransformer_model = train_tabtransformer(df_train, df_val, target_col='Anno')
    models[key] = tabtransformer_model
    save_model(tabtransformer_model, f'model_tabtransformer_{scaler_type}')
    print(f"Trained TabTransformer with {scaler_type} scaler")

# Validation

In [None]:
# Valutazione dei modelli con tutte le combinazioni di scaler e PCA
performance = {}

# Valutazione modelli tradizionali
for clfName in ['LR', 'RF', 'KNR', 'SVR']:
    for scaler_type in preprocessing_options[clfName]['scaler_type']:
        for use_pca in preprocessing_options[clfName]['use_pca']:
            key = f"{clfName}_{scaler_type}_{'PCA' if use_pca else 'NoPCA'}"
            X_val_scaled = scalers[key].transform(X_val)
            if pcas[key]:
                X_val_scaled = pcas[key].transform(X_val_scaled)
            y_pred = models[key].predict(X_val_scaled)
            mse = mean_squared_error(y_val, y_pred)
            mae = mean_absolute_error(y_val, y_pred)
            r2 = r2_score(y_val, y_pred)
            performance[key] = {'mse': mse, 'mae': mae, 'r2': r2}
            print(f"Performance of {clfName} with {scaler_type} scaler and PCA={use_pca}: MSE={mse}, MAE={mae}, R2={r2}")

# Valutazione FFNN
for scaler_type in preprocessing_options['FFNN']['scaler_type']:
    for use_pca in preprocessing_options['FFNN']['use_pca']:
        key = f"FFNN_{scaler_type}_{'PCA' if use_pca else 'NoPCA'}"
        X_val_scaled = scalers[key].transform(X_val)
        if pcas[key]:
            X_val_scaled = pcas[key].transform(X_val_scaled)
        X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
        y_pred_tensor = models[key](X_val_tensor).detach().numpy().squeeze()
        mse = mean_squared_error(y_val, y_pred_tensor)
        mae = mean_absolute_error(y_val, y_pred_tensor)
        r2 = r2_score(y_val, y_pred_tensor)
        performance[key] = {'mse': mse, 'mae': mae, 'r2': r2}
        print(f"Performance of FFNN with {scaler_type} scaler and PCA={use_pca}: MSE={mse}, MAE={mae}, R2={r2}")

# Valutazione TabNet
for scaler_type in preprocessing_options['TabNet']['scaler_type']:
    key = f"TabNet_{scaler_type}_NoPCA"
    X_val_scaled = scalers[key].transform(X_val)
    y_pred = models[key].predict(X_val_scaled)
    mse = mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    performance[key] = {'mse': mse, 'mae': mae, 'r2': r2}
    print(f"Performance of TabNet with {scaler_type} scaler: MSE={mse}, MAE={mae}, R2={r2}")

# Valutazione TabTransformer
for scaler_type in preprocessing_options['TabTransformer']['scaler_type']:
    key = f"TabTransformer_{scaler_type}_NoPCA"
    y_pred = models[key].predict(df_val).squeeze()
    mse = mean_squared_error(y_val, y_pred)
    mae = mean_absolute_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    performance[key] = {'mse': mse, 'mae': mae, 'r2': r2}
    print(f"Performance of TabTransformer with {scaler_type} scaler: MSE={mse}, MAE={mae}, R2={r2}")

# Salva le performance dei modelli
with open('performance.txt', 'w') as f:
    for clfName, metrics in performance.items():
        f.write(f"{clfName}: MSE={metrics['mse']}, MAE={metrics['mae']}, R2={metrics['r2']}\n")
