In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_validate
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Data loading
selected = pd.read_csv("top_features.csv")['Variable'].to_list() + ['OS_MONTHS', 'OS_STATUS']

df_train = pd.read_csv('train_data.csv')
df_test = pd.read_csv('test_data.csv')
x_train = df_train[selected]
x_test = df_test[selected]
y_train = df_train['OS_STATUS'].map({0: 1, 1: 0})
y_test = df_test['OS_STATUS'].map({0: 1, 1: 0})

# Models
models = {
    'SVM': SVC(),
    'RandomForest': RandomForestClassifier(),
    'LDA': LinearDiscriminantAnalysis(),
    'DecisionTree': DecisionTreeClassifier()
}

train_results = []
test_results = []

for model_name, model in models.items():
    # Cross-validation on training set
    cv_results = cross_validate(model, x_train, y_train, cv=10, scoring=['accuracy', 'precision', 'recall', 'f1'])
    
    # Train set metrics
    train_avg_accuracy = cv_results['test_accuracy'].mean()
    train_avg_precision = cv_results['test_precision'].mean()
    train_avg_recall = cv_results['test_recall'].mean()
    train_avg_f1 = cv_results['test_f1'].mean()
    train_results.append([model_name, train_avg_accuracy, train_avg_precision, train_avg_recall, train_avg_f1])
    
    # Fit model on the entire training set and predict on test set
    model.fit(x_train, y_train)
    y_pred_test = model.predict(x_test)
    
    # Test set metrics
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_precision = precision_score(y_test, y_pred_test)
    test_recall = recall_score(y_test, y_pred_test)
    test_f1 = f1_score(y_test, y_pred_test)
    test_results.append([model_name, test_accuracy, test_precision, test_recall, test_f1])

# Create dataframes for train and test results
train_results_df = pd.DataFrame(train_results, columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1'])
test_results_df = pd.DataFrame(test_results, columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1'])

# Save train and test results to CSV files
train_results_df.to_csv('train_classification_results.csv', index=False)
test_results_df.to_csv('test_classification_results.csv', index=False)