In [8]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix,classification_report
import os
from sklearn.svm import SVC


# Caricamento dati
X_train = pd.read_csv("../data/splitted/X_train.csv")
X_test = pd.read_csv("../data/splitted/X_test.csv")
y_train = pd.read_csv("../data/splitted/y_train.csv").values.ravel()
y_test = pd.read_csv("../data/splitted/y_test.csv").values.ravel()

print("Colonne con solo NaN:")
print(X_train.columns[X_train.isna().all()])

print("Colonne costanti:")
print(X_train.columns[X_train.nunique() <= 1])

# Rimozione colonne inutili o problematiche
X_train = X_train.drop(columns=['source'], errors='ignore')  # elimino colonna tutta NaN
X_train = X_train.loc[:, X_train.nunique() > 1]  # elimino colonne costanti

# Allineamento test set
X_test = X_test[X_train.columns]

# Riempio NaN residui con la mediana
X_train = X_train.fillna(X_train.median())
X_test = X_test.fillna(X_train.median())

Colonne con solo NaN:
Index(['source'], dtype='object')
Colonne costanti:
Index(['source', 'country_Cambodia', 'country_Latvia', 'country_Lebanon',
       'country_Liechtenstein', 'country_Nepal', 'country_Oman',
       'country_Peru', 'country_Portugal', 'country_Tanzania',
       'country_Uzbekistan'],
      dtype='object')


In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model.fit(X_train_scaled, y_train)

y_pred_train = model.predict(X_train_scaled)
y_pred_test = model.predict(X_test_scaled)

In [15]:
train_metrics = {
    "Accuracy": accuracy_score(y_train, y_pred_train),
    "Precision": precision_score(y_train, y_pred_train),
    "Recall": recall_score(y_train, y_pred_train),
    "F1-Score": f1_score(y_train, y_pred_train),
    "Confusion Matrix": confusion_matrix(y_train, y_pred_train).tolist()
}
test_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_test),
    "Precision": precision_score(y_test, y_pred_test),
    "Recall": recall_score(y_test, y_pred_test),
    "F1-Score": f1_score(y_test, y_pred_test),
    "Confusion Matrix": confusion_matrix(y_test, y_pred_test).tolist()
}

In [18]:
import os

output_dir = "../results/classification_selfMade/SVM"
os.makedirs(output_dir, exist_ok=True)

filename = os.path.join(output_dir, f"{model}_metrics.txt")
with open(filename, "w") as f:
    f.write(f"Modello: SVM ")

    f.write("TRAIN METRICS:\n")
    for k, v in train_metrics.items():
        if k == "Confusion Matrix":
            f.write("Confusion Matrix:\n")
            f.write(f"{v}\n")
        else:
            f.write(f"{k}: {v:.4f}\n")

    f.write("\nTEST METRICS:\n")
    for k, v in test_metrics.items():
        if k == "Confusion Matrix":
            f.write("Confusion Matrix:\n")
            f.write(f"{v}\n")
        else:
            f.write(f"{k}: {v:.4f}\n")

print(f"✔ Metriche salvate in: {filename}")



✔ Metriche salvate in: ../results/classification_selfMade/SVM\SVC(random_state=42)_metrics.txt
