In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import random
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
labels = ['pyramid', 'Tetrahedron','cow','cube','sphere']
img_size = 600
def get_data(data_dir,data_size):
    data = [] 
    for label in labels: 
        path = os.path.join(data_dir, label)
        class_num = labels.index(label)
        for i in range(data_size):
            img=os.listdir(path)[i]
            try:
                img_arr = cv2.imread(os.path.join(path, img),cv2.IMREAD_GRAYSCALE)
                resized_arr = cv2.resize(img_arr, (img_size, img_size)) #Reshaping images to preferred size
                data.append([resized_arr, class_num])
            except Exception as e:
                print(e)
    return np.array(data)

In [None]:
data_size=500
data_set = get_data(r'.\Data',data_size)

In [None]:
X=[]
y=[]
for feature, label in data_set:
    X.append(feature)
    y.append(label)


In [None]:
#test 
num = random.randint(0,len(X_train))
plt.figure(figsize = (5,5))
plt.imshow(X[num],cmap='gray')
plt.title(labels[y[num]])
print(num)

In [None]:
#Spliting the Data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [None]:
#test 
num = random.randint(0,len(X_train))
plt.figure(figsize = (5,5))
plt.imshow(X_train[num],cmap='gray')
plt.title(y_train[num])
print(num)

In [None]:
# Normalize the data

X_train = np.array(X_train) / 255
X_test = np.array(X_test) / 255

#reshape and convert target values to one hot vectors
#X_train=X_train.reshape(-1, img_size, img_size, 1)
y_train = np.array(y_train)
#y_train = keras.utils.to_categorical(y_train, len(labels))


#X_test=X_test.reshape(-1, img_size, img_size, 1)
y_test = np.array(y_test)
#y_test = keras.utils.to_categorical(y_test, len(labels))

print(np.shape(X_train))
print(np.shape(y_train))
print(np.shape(X_test))
print(np.shape(y_test))

In [None]:
#sklearn expects 2d array, reshape to 2d array:
x_train2 = X_train.reshape((len(X_train),img_size**2))
x_test2 = X_test.reshape((len(X_test),img_size**2))

In [None]:
print(np.shape(x_train2))
print(np.shape(x_test2))

In [None]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=30, n_jobs=-1, random_state=42)

In [None]:
# training of the random forest
rnd_clf.fit(x_train2,y_train)

## Accuracy

In [None]:
# accuracy
y_pred_train = rnd_clf.predict(x_train2)
print('training accuracy:', accuracy_score(y_train, y_pred_train))

y_pred_test = rnd_clf.predict(x_test2)
print('test accuracy:', accuracy_score(y_test, y_pred_test))

## Classification report

In [None]:
y_pred=rnd_clf.predict(x_test2)

accuracy_score(y_pred,y_test)
print(classification_report(y_pred,y_test))

## confusion matrix

In [None]:
# confusion matrix


y_pred=rnd_clf.predict(x_test2)
# convert one hot to vector

cm1 = metrics.confusion_matrix(y_test, y_pred)

plt.figure(figsize=(9,9))
ax= plt.subplot()
sns.heatmap(cm1, annot=True, fmt=".0f", linewidths=.5, square=True, cmap='Blues_r')
ax.set_ylabel('true shape', size=17)
ax.set_xlabel('predicted shape', size=17)
ax.xaxis.set_ticklabels(labels); ax.yaxis.set_ticklabels(labels)


## Showing all errors

In [None]:
errors = (y_pred - y_test != 0)
error_pos = np.array(np.where(errors))
print('total number of errors:', len(error_pos[0,:]))

errl=[]
for i in range(len(y_test)):
    if y_pred[i] - y_test[i] != 0:
        errl.append(i)
        
plt.figure(figsize=(80,80))
x, y =2,int(len(errl)/2)+1
for i in range(len(errl)):
    plt.subplot(y, x,i+1)
    m=int(errl[i])
    plt.imshow(X[m],cmap='gray')
    plt.title('target: {}\npredicted: {}'.format((labels[y_test[m]]),labels[y_pred[m]]))
    plt.tight_layout()
    plt.axis('off')
plt.show()

## Showing some (random) errors

In [None]:
errors = (y_pred - y_test != 0)
error_pos = np.array(np.where(errors))
print('total number of errors:', len(error_pos[0,:]))

errl=[]
for i in range(len(y_test)):
    if y_pred[i] - y_test[i] != 0:
        errl.append(i)
        
plt.figure(figsize=(15,15))
x, y =3,int(25/3)+1
for i in range(25):
    plt.subplot(y, x,i+1)
    m=int(errl[i])
    plt.imshow(X[m],cmap='gray')
    plt.title('true shape: {}\npredicted: {}'.format((labels[y_test[m]]),labels[y_pred[m]]))
    plt.tight_layout()
    plt.axis('off')
plt.show()

## show some cows

In [None]:
#show some cows

plt.figure(figsize=(10,10))
x, y =2,3
n=0
for i in range(len(errl)): 
    m=errl[i]
    if y_pred[m]==labels.index('cow'):
        plt.subplot(y, x,n+1)
        plt.imshow(X[m],cmap='gray')
        plt.title('true Shape: {}\npredicted: {}'.format((labels[y_test[m]]),labels[y_pred[m]]))
        plt.tight_layout()
        plt.axis('off')
        n=n+1

plt.show()