# Import dependencies

In [None]:
import os
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import VotingClassifier

import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, f1_score, make_scorer

In [None]:
! gdown 1G93DGgD2Xw58be5PNOGT8HB_YSWrWll7

Downloading...
From: https://drive.google.com/uc?id=1G93DGgD2Xw58be5PNOGT8HB_YSWrWll7
To: /content/train_data (4).csv
100% 1.04G/1.04G [00:04<00:00, 245MB/s]


In [None]:
! gdown 1Ku1_9Y7zQaDmYv8jdDVAtD289cIrGuon

In [None]:
! mv 'train_data (4).csv' 'train_data.csv'

In [None]:
data = pd.read_csv('train_data.csv')

In [None]:
data_test = pd.read_csv('test_data_flatten.csv')

## Training set

In [None]:
X_train = data.drop(columns=['Label'])
y_train = data['Label']

In [None]:
X_train = X_train.to_numpy()

In [None]:
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

## Testing set

In [None]:
X_test = data_test.drop(columns=['Label'])
y_test = data_test['Label']

In [None]:
X_test = X_test.to_numpy()

In [None]:
y_test = label_encoder.transform(y_test)

In [None]:
f1_macro = make_scorer(f1_score, average='macro')

# GridSearch for RFC

In [None]:
RFC_parameters = {
    'n_estimators': [100, 200, 500],
    'max_depth': [100, 200, 500],
    'max_features': [1000, 5000, 10000]
}

In [None]:
RFC = RandomForestClassifier(n_jobs=-1)

In [None]:
RFC_GridSearch = GridSearchCV(RFC, RFC_parameters, verbose=100, scoring=f1_macro)
RFC_GridSearch.fit(X_train, y_train)

In [None]:
best_RFC_estimator = RFC_GridSearch.best_estimator_

In [None]:
# testing
y_pred = best_RFC_estimator.predict(X_test)

report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

print(report)

# GridSearch for KNN

In [None]:
KNN_parameters = {
    'n_neighbors': [1, 2, 3, 4, 5],
    'p': [1, 2, 3]
}

In [None]:
KNN = KNeighborsClassifier(n_jobs=-1)

In [None]:
KNN_GridSearch = GridSearchCV(KNN, KNN_parameters, verbose=100, scoring=f1_macro)
KNN_GridSearch.fit(X_train, y_train)

In [None]:
best_KNN_estimator = KNN_GridSearch.best_estimator_

In [None]:
# testing
y_pred = best_KNN_estimator.predict(X_test)

report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

print(report)

# Sofmax Regression

In [None]:
SR_parameters = {
    'max_iter': [100, 500, 1000, 5000],
    'multi_class': ['multinomial']
}

In [None]:
SR = LogisticRegression(n_jobs=-1)

In [None]:
SR_GridSearch = GridSearchCV(SR, SR_parameters, verbose=100, scoring=f1_macro)
SR_GridSearch.fit(X_train, y_train)

In [None]:
best_SR_estimator = SR_GridSearch.best_estimator_

In [None]:
# testing
y_pred = best_SR_estimator.predict(X_test)

report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)

print(report)