In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns

In [None]:
values = ["apple","pineapple","banana","t-shirt","pencil","pants","cat","car","envelope","eye",
          "star","mouth","umbrella","teddy-bear","tent","guitar","headphones","camera","calculator","door",
          "birthday-cake","bicycle","traffic-light","tree","wine-glass","skateboard","pizza","octopus","light-bulb",
          "ladder","hourglass","helicopter","donut","ice-cream","crown","butterfly","leaf","laptop","skull","jail"]

files = ["apple.npy","pineapple.npy","banana.npy","t-shirt.npy","pencil.npy","pants.npy","cat.npy",
         "car.npy","envelope.npy","eye.npy","star.npy","mouth.npy","umbrella.npy","teddy_bear.npy","tent.npy",
         "guitar.npy","headphones.npy","camera.npy","calculator.npy","door.npy","birthday_cake.npy",
         "bicycle.npy","traffic_light.npy","tree.npy","wine_glass.npy","skateboard.npy","pizza.npy","octopus.npy",
         "light_bulb.npy","ladder.npy","hourglass.npy","helicopter.npy","donut.npy","ice_cream.npy","crown.npy",
         "butterfly.npy","leaf.npy","laptop.npy","skull.npy","jail.npy"]
def load(dir, files,max_samples=2000):
    X = []
    Y = []
    for index in range(len(files)):
        f = np.load(dir + files[index])
        for i in range(len(f)):
            x = np.reshape(f[i],(28,28))
            if i==max_samples:
                break
            X.append(x)
            Y.append(index)
    return np.array(X),np.array(Y)

max_samples=1000
X,Y = load("C:\VisheshFiles\OneDrive - Swast Solutions Private Limited\OldProjects\projects\Quick_Draw/data", files, max_samples)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,train_size=0.9,stratify=Y,random_state=0)
X_train_flatten = X_train.reshape(X_train.shape[0], -1)
X_test_flatten = X_test.reshape(X_test.shape[0], -1)

# Print shapes to confirm
print(f"X_train shape: {X_train.shape}, Y_train shape: {Y_train.shape}")
print(f"X_test shape: {X_test.shape}, Y_test shape: {Y_test.shape}")

for count in range(len(values)):
    plt.figure(figsize=(10,2))
    for i in range(5):
        plt.subplot(1, 5, i + 1)
        plt.imshow(X[i+count*max_samples], cmap='gray')
        plt.axis('off')
    plt.suptitle(values[count],fontsize=20)
    plt.show()

# Original Dataset

## KNN

### Accuracy = 82.9, k=3
### Accuracy = 82.3125, k=5, *
### Accuracy = 82.3125, k=7
### Accuracy = 82.1, k=9

In [None]:
# KNN model and parameter grid
knn = KNeighborsClassifier(n_neighbors=5)

# Train and predict using KNN
knn.fit(X_train_flatten, Y_train)
Y_pred_knn = knn.predict(X_test_flatten)

# Accuracy and Confusion Matrix for KNN
accuracy_knn = accuracy_score(Y_test, Y_pred_knn)
cm_knn = confusion_matrix(Y_test, Y_pred_knn)

print(f"KNN Accuracy: {accuracy_knn}")

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_knn, annot=True, fmt='d', cmap='Blues')
plt.title("KNN Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

## Decision Trees

### Accuracy = 58.8, depth=13
### Accuracy = 59.8, depth=13, min_impurity_decrease=0.00014 *

In [None]:
decision_tree = DecisionTreeClassifier(max_depth=13, min_impurity_decrease=0.00014)

# Train and predict using Decision Tree
decision_tree.fit(X_train_flatten, Y_train)
Y_pred_dt = decision_tree.predict(X_test_flatten)

# Accuracy and Confusion Matrix for Decision Tree
accuracy_dt = accuracy_score(Y_test, Y_pred_dt)
cm_dt = confusion_matrix(Y_test, Y_pred_dt)

print(f"Decision Tree Accuracy: {accuracy_dt}")

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_dt, annot=True, fmt='d', cmap='Blues')
plt.title("Decision Tree Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

## Random Forests

### Accuracy = 82, depth=21

In [None]:
# Random Forest model and parameter grid
random_forest = RandomForestClassifier()

param_grid_rf = {
    'n_estimators': [100],
    'max_depth': [21],
    'min_samples_leaf':[1],
    'random_state':[0]
}

# Grid Search for Random Forest
grid_search_rf = GridSearchCV(random_forest, param_grid_rf, cv=5, n_jobs=-1)
grid_search_rf.fit(X_train_flatten, Y_train)

# Best Random Forest model
best_rf = grid_search_rf.best_estimator_
Y_pred_rf = best_rf.predict(X_test_flatten)

# Best parameters
best_params_rf = grid_search_rf.best_params_
print(f"Best parameters found: {best_params_rf}")

# Accuracy and Confusion Matrix for Random Forest
accuracy_rf = accuracy_score(Y_test, Y_pred_rf)
cm_rf = confusion_matrix(Y_test, Y_pred_rf)

print(f"Random Forest Accuracy: {accuracy_rf}")

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_rf, annot=True, fmt='d', cmap='Blues')
plt.title("Random Forest Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

## Softmax/Logistic Regression

### Accuracy = 69.3

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Softmax Regression model
softmax_regression = LogisticRegression(multi_class='multinomial')

# Train and predict using Softmax Regression
softmax_regression.fit(X_train_flatten, Y_train)
Y_pred_sr = softmax_regression.predict(X_test_flatten)

# Accuracy and Confusion Matrix for Softmax Regression
accuracy_sr = accuracy_score(Y_test, Y_pred_sr)
cm_sr = confusion_matrix(Y_test, Y_pred_sr)

print(f"Softmax Regression Accuracy: {accuracy_sr}")

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_sr, annot=True, fmt='d', cmap='Blues')
plt.title("Softmax Regression Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

## SVM Classifier

### Accuracy = 69.5, Linear kernel
### Accuracy = 84.625, Polynomial kernel, degree=2
### Accuracy = 

In [None]:
linear_svm = SVC(kernel='linear')
linear_svm.fit(X_train_flatten, Y_train)
Y_pred_linear = linear_svm.predict(X_test_flatten)

# Accuracy and Confusion Matrix for Linear SVM
accuracy_linear = accuracy_score(Y_test, Y_pred_linear)
cm_linear = confusion_matrix(Y_test, Y_pred_linear)

print(f"Linear SVM Accuracy: {accuracy_linear}")

# Plot confusion matrix for Linear SVM
plt.figure(figsize=(8, 6))
sns.heatmap(cm_linear, annot=True, fmt='d', cmap='Blues')
plt.title("Linear SVM Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

In [None]:
poly_svm = SVC(kernel='poly', degree=2)
poly_svm.fit(X_train_flatten, Y_train)
Y_pred_poly = poly_svm.predict(X_test_flatten)

# Accuracy and Confusion Matrix for Polynomial SVM
accuracy_poly = accuracy_score(Y_test, Y_pred_poly)
cm_poly = confusion_matrix(Y_test, Y_pred_poly)

print(f"Polynomial SVM Accuracy: {accuracy_poly}")

# Plot confusion matrix for Polynomial SVM
plt.figure(figsize=(8, 6))
sns.heatmap(cm_poly, annot=True, fmt='d', cmap='Blues')
plt.title("Polynomial SVM Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()