First, we begin by importing the necessary libriries. We use **pandas** for loading the datasets.

In [1]:
import pandas as pd

We use **scikit-learn** to test different classifiers on our data.

In [2]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

from sklearn.linear_model import LogisticRegression, Perceptron, RidgeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

After that, we declare some *constants* that will be helpful later on.

In [3]:
NUM_FEATURES = 224
TRAIN_DATA = '../datasets/datasetTV.csv'

Finally, we load the **train** set.

In [4]:
train = pd.read_csv(TRAIN_DATA, header=None)

We *preprocess* the data...

In [5]:
feature_columns = [f'feature_{i+1}' for i in range(NUM_FEATURES)]
train.columns = feature_columns + ['label']

# split train data into features and labels
X_train = train[feature_columns]
y_train = train['label']

# scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

...and define the **models** we want to test, along with their *parameters*.

In [6]:
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    
    "KNN(5NN)": KNeighborsClassifier(n_neighbors=5),
    "Naive Bayes": GaussianNB(),

    "Logistic Regression": LogisticRegression(max_iter=300, random_state=42),
    "Perceptron": Perceptron(max_iter=300, random_state=42),
    "Least Squares": RidgeClassifier(),
    
    "SVM": SVC(kernel='linear', random_state=42),
    
    "Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42),

    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    
    "AdaBoost": AdaBoostClassifier(n_estimators=100, random_state=42)
}

We then train each **model** and calculate its accuracy using **5-fold cross-validation**.

In [7]:
results = {}
for name, model in models.items():
    mean_score = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='accuracy').mean()
    results[name] = mean_score
    print(f"{name} - Cross-validation accuracy: {mean_score:.2f}")

Decision Tree - Cross-validation accuracy: 0.62
KNN(5NN) - Cross-validation accuracy: 0.81
Naive Bayes - Cross-validation accuracy: 0.70
Logistic Regression - Cross-validation accuracy: 0.77
Perceptron - Cross-validation accuracy: 0.69
Least Squares - Cross-validation accuracy: 0.75
SVM - Cross-validation accuracy: 0.76
Neural Network - Cross-validation accuracy: 0.83
Random Forest - Cross-validation accuracy: 0.80
AdaBoost - Cross-validation accuracy: 0.65
