In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.decomposition import PCA




K-NN Classifier

In [32]:
data_train = pd.read_csv('datasetTV.csv', header=None)
data_test = pd.read_csv('datasetTest.csv', header=None)

X_train = data_train.iloc[:, :-1]
y_train = data_train.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
KNN = KNeighborsClassifier()

# Function to tune the model using GridSearchCV
def tune_model(model, param_grid):
    grid = GridSearchCV(model, param_grid, cv=3, n_jobs=-1) # Define the grid search
    grid.fit(X_train, y_train) # Train the model
    y_pred = grid.predict(X_test) # Make predictions
    accuracy = grid.score(X_test, y_test) # Get the accuracy
    best_model = grid.best_estimator_ # Get the best model
    return accuracy, best_model

# Define the hyperparameter grid

param_grid = {'n_neighbors': [14], 'algorithm': ['auto'], 'weights': ['distance'],'p': [1]}
accuracy, best_model = tune_model(KNN, param_grid)
print(KNN.get_params())
print(accuracy, best_model)


{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
0.8416237850200115 KNeighborsClassifier(n_neighbors=14, p=1, weights='distance')


Naive Bayes Classifier (Gaussian version)

In [33]:
data_train = pd.read_csv('datasetTV.csv', header=None)
data_test = pd.read_csv('datasetTest.csv', header=None)

X_train = data_train.iloc[:, :-1]
y_train = data_train.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
GNB = GaussianNB() # Create a Gaussian Naive Bayes model

# Function to tune the model using GridSearchCV
def tune_model(model, param_grid):
    grid = GridSearchCV(model, param_grid, cv=3, n_jobs = -1) # Define the grid search
    grid.fit(X_train, y_train) # Train the model
    y_pred = grid.predict(X_test) # Make predictions
    accuracy = grid.score(X_test, y_test) # Get the accuracy
    best_model = grid.best_estimator_ # Get the best model
    return accuracy, best_model

# Define the hyperparameter grid

param_grid = {}
accuracy, best_model = tune_model(GNB, param_grid)
print(GNB.get_params())
print(accuracy, best_model)

{'priors': None, 'var_smoothing': 1e-09}
0.6986849628359062 GaussianNB()


Support Vector Classifier

In [34]:


SVC = SVC() # Create SVC Classifier



# Define the hyperparameter grid

param_grid = {'C': [10], 'gamma': [0.01], 'kernel': ['poly'], 'class_weight': ['balanced']}
accuracy, best_model = tune_model(SVC, param_grid)
print(SVC.get_params())
print(accuracy, best_model)

# Test model in the dataset with the best model
X_test = data_test.iloc[:, :]
y_pred = best_model.predict(X_test)
np.save('labelsX', y_pred)





{'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
0.8650657518582047 SVC(C=10, class_weight='balanced', gamma=0.01, kernel='poly')
