In [60]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import mode
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

trainData = pd.read_csv('TrainData5.txt', sep='\\s+', header=None)
trainLabels = pd.read_csv('TrainLabel5.txt', sep='\\s+', header=None)
testData = pd.read_csv('TestData5.txt', sep='\\s+', header=None)

trainData.replace(1.00000000000000e+99, np.nan, inplace=True)
testData.replace(1.00000000000000e+99, np.nan, inplace=True)

missing_count = trainData.isnull().sum()
print(f"Missing values count in train data:\n{missing_count}")

myScaler = StandardScaler()
trainDataScaled = pd.DataFrame(myScaler.fit_transform(trainData), columns=trainData.columns)
testDataScaled = pd.DataFrame(myScaler.transform(testData), columns=testData.columns)

X_train, X_val, y_train, y_val = train_test_split(trainDataScaled, trainLabels, shuffle=True, test_size=0.2, random_state=50)

myKNN = KNeighborsClassifier(n_neighbors=5)
myNN = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=50)
myDT = DecisionTreeClassifier(random_state=50)

myKNN.fit(X_train, y_train.values.ravel())
X_valPredictionKNN = myKNN.predict(X_val)

myNN.fit(X_train, y_train.values.ravel())
X_valPredictionNN = myNN.predict(X_val)

myDT.fit(X_train, y_train.values.ravel())
X_valPredictionDT = myDT.predict(X_val)

X_valAccuracyKNN = accuracy_score(y_val, X_valPredictionKNN)
X_valAccuracyNN = accuracy_score(y_val, X_valPredictionNN)
X_valAccuracyDT = accuracy_score(y_val, X_valPredictionDT)

X_valReportKNN = classification_report(y_val, X_valPredictionKNN)
X_valReportNN = classification_report(y_val, X_valPredictionNN)
X_valReportDT = classification_report(y_val, X_valPredictionDT)

print(f"The validation set had an accuracy of {X_valAccuracyKNN} for KNN")
print("Classification report for KNN validation set:\n", X_valReportKNN)

print(f"The validation set had an accuracy of {X_valAccuracyNN} for Neural Network")
print("Classification report for Neural Network validation set:\n", X_valReportNN)

print(f"The validation set had an accuracy of {X_valAccuracyDT} for Decision Tree")
print("Classification report for Decision Tree validation set:\n", X_valReportDT)

combined_predictions = np.column_stack((X_valPredictionKNN, X_valPredictionNN, X_valPredictionDT))
final_predictions = mode(combined_predictions, axis=1).mode.flatten()

X_valAccuracyCombined = accuracy_score(y_val, final_predictions)
X_valReportCombined = classification_report(y_val, final_predictions)

print(f"The combined model had an accuracy of {X_valAccuracyCombined}")
print("Classification report for the combined model:\n", X_valReportCombined)

myKNN.fit(trainDataScaled, trainLabels.values.ravel())
myNN.fit(trainDataScaled, trainLabels.values.ravel())
myDT.fit(trainDataScaled, trainLabels.values.ravel())

testDataPredictionKNN = myKNN.predict(testDataScaled)
testDataPredictionNN = myNN.predict(testDataScaled)
testDataPredictionDT = myDT.predict(testDataScaled)

combined_test_predictions = np.column_stack((testDataPredictionKNN, testDataPredictionNN, testDataPredictionDT))
final_test_predictions = mode(combined_test_predictions, axis=1).mode.flatten()

print("\nPredictions on Test Data from the Combined Model:\n", final_test_predictions)


Missing values count in train data:
0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
dtype: int64


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


The validation set had an accuracy of 0.5714285714285714 for KNN
Classification report for KNN validation set:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.00      0.00      0.00         7
           5       0.67      0.67      0.67       104
           6       0.47      0.58      0.52        81
           7       0.65      0.39      0.49        28
           8       0.00      0.00      0.00         2

    accuracy                           0.57       224
   macro avg       0.30      0.27      0.28       224
weighted avg       0.56      0.57      0.56       224

The validation set had an accuracy of 0.625 for Neural Network
Classification report for Neural Network validation set:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.00      0.00      0.00         7
           5       0.75      0.74      0.74       104
       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Predictions on Test Data from the Combined Model:
 [5 5 5 6 5 5 4 6 6 5 5 5 5 5 5 5 6 5 5 6 6 5 6 5 5 5 5 6 5 5 5 5 5 5 5 6 5
 6 5 5 5 5 6 5 5 5 6 5 5 5 5 6 5 6 6 5 5 5 5 5 5 5 5 6 5 5 6 7 5 5 5 6 6 5
 5 5 5 7 6 5 5 7 6 6 7 5 6 5 5 6 5 5 5 6 5 6 6 5 6 6 7 6 7 6 5 6 6 7 7 6 6
 6 5 5 5 5 6 6 5 6 7 6 5 6 6 5 6 6 6 7 6 5 7 5 6 6 6 5 6 5 5 6 5 6 7 7 6 7
 5 7 5 6 6 6 5 6 6 6 5 6 6 6 5 6 6 5 5 5 5 6 6 5 7 5 5 5 5 5 6 6 6 5 6 5 6
 7 6 6 5 5 7 5 5 5 5 6 5 5 5 6 5 6 6 5 6 6 6 5 5 5 5 4 6 5 6 5 5 6 5 6 5 5
 5 6 5 5 5 6 5 5 5 6 5 4 5 5 4 5 5 5 5 5 5 6 5 5 5 5 5 5 6 6 6 6 6 5 6 5 5
 6 5 7 5 6 7 5 6 6 6 6 6 6 5 6 6 5 5 5 7 7 6 7 6 6 5 7 7 6 6 6 6 6 6 5 6 5
 6 5 5 5 5 6 5 6 7 7 7 7 7 6 6 7 6 6 5 5 6 6 7 6 5 6 7 7 4 5 5 7 6 6 7 6 5
 6 6 7 6 5 6 7 6 4 7 6 6 6 5 5 6 6 6 6 6 5 6 6 6 5 5 4 4 4 6 6 7 5 5 6 6 6
 6 5 6 5 6 6 5 5 6 5 5 5 6 5 6 6 5 5 5 6 5 6 7 5 5 5 4 6 6 6 5 7 6 6 5 6 5
 5 5 5 5 5 6 6 6 5 5 6 5 6 6 6 6 6 5 5 5 5 5 6 6 6 5 4 8 6 6 6 6 8 6 7 5 4
 7 6 5 6 5 6 6 6 7 5 6 5 6 5 6 6 6 6 6 6 6 6 5 6

