# Choosing model according to accuracy

## Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
from skimage.io import imsave, imread

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
import joblib

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

# from catboost import CatBoostClassifier #!pip install catboost

## Functions

In [None]:
import numpy as np

def Round_Array(arr, num_digits):
    rounded_arr = [[round(x, num_digits) for x in row] for row in arr]
    return np.array(rounded_arr)

In [None]:
def Replace_1_To_255(arr):
    for i in range(len(arr)):
        for j in range(len(arr[i])):
            if arr[i][j] == 1:
                arr[i][j] = 255
    return arr

## Importing the dataset

In [None]:
training_file_name = 'Duck backward elimination with support vector classification (10 featurs selected) - prepared data' + '.csv'
data = pd.read_csv(training_file_name)

# The file is read with a column of indexes and this line removes it
data = data.drop(data.columns[0], axis=1)

# Remove the segmented image layer
X = data.drop('segmented', axis=1)

y = data['segmented']

## Spliting to train and sest by classes

In [None]:
# Split the data by classes
data_class_1 = data.loc[data['segmented'] == 1]
data_class_0 = data.loc[data['segmented'] == 0]

In [None]:
# Split to train and test
test_size = 900
train_size = 2000
class_1_train, class_1_test = train_test_split(data_class_1, test_size=test_size , train_size=train_size, random_state = 0)
class_0_train, class_0_test = train_test_split(data_class_0, test_size=test_size , train_size=train_size, random_state = 0)

# Concat the train set of the two classes and shuffle it
train = pd.concat([class_1_train, class_0_train], axis=0)
train = train.sample(frac=1, random_state=0)

# Concat the test
test = pd.concat([class_1_test, class_0_test], axis=0)

In [None]:
X_train = train.drop('segmented', axis=1)
y_train = train['segmented']

X_test = test.drop('segmented', axis=1)
y_test = test['segmented']

## Train the model

In [None]:
model_name = 'XGBClassifier'
classifier = XGBClassifier().fit(X_train, y_train)

## Accuracy assessment

In [None]:
# Import new image for test
training_file_name = 'Geometry (10 featurs selected) - prepared data (only for testing)' + '.csv'
test_data = pd.read_csv(training_file_name)

# The file is read with a column of indexes and this line removes it
test_data = test_data.drop(test_data.columns[0], axis=1)

# Remove the segmented image layer
X_new_image = test_data.drop('segmented', axis=1)

y_new_image = test_data['segmented']

# Feature scaling
X_new_image = StandardScaler().fit_transform(X_new_image)

In [None]:
# Predict for new image
predicte_new = classifier.predict(X_new_image)

# Predict for the train set
y_pred_train = classifier.predict(X_train)

# Predict for the train set
y_pred_test = classifier.predict(X_test)

In [None]:
# Print confusion matrix and accuracy

num_of_digets = 2

# New image
cm = confusion_matrix(y_new_image, predicte_new)
row_sums = cm.sum(axis=1)
cm_new_image = (cm / row_sums) * 100
cm_new_image = Round_Array(cm_new_image, num_of_digets)

accuracy_new_image = accuracy_score(y_new_image, predicte_new) * 100
accuracy_new_image = round(accuracy_new_image, num_of_digets)

print('New Image:')
print(cm_new_image)
print('Accuracy = ' + str(accuracy_new_image))
print('--------------------------')
print('\n')


# Test set
cm = confusion_matrix(y_test, y_pred_test)
row_sums = cm.sum(axis=1)
cm_test = (cm / row_sums) * 100
cm_test = Round_Array(cm_test, num_of_digets)

accuracy_test = accuracy_score(y_test, y_pred_test) * 100
accuracy_test = round(accuracy_test, num_of_digets)

print('Test set:')
print(cm_test)
print('Accuracy = ' + str(accuracy_test))
print('--------------------------')
print('\n')


# Train set
cm = confusion_matrix(y_train, y_pred_train)
row_sums = cm.sum(axis=1)
cm_train = (cm / row_sums) * 100
cm_train = Round_Array(cm_train, num_of_digets)

accuracy_train = accuracy_score(y_train, y_pred_train) * 100
accuracy_train = round(accuracy_train, num_of_digets)

print('Train set:')
print(cm_train)
print('Accuracy = ' + str(accuracy_train))
print('--------------------------')
print('\n')

In [None]:
# Save the accuracy of the model

accuracy_table = {'new image': [cm_new_image, accuracy_new_image],
                  'test set':  [cm_test, accuracy_test],
                  'train set': [cm_train, accuracy_train]}

accuracy_table = pd.DataFrame(accuracy_table, index=['confusion matrix', 'total accuracy'])

In [None]:
file_name = 'Accuracy ' + model_name + '.csv'
accuracy_table.to_csv(file_name)
files.download(file_name)

## Show the images classification

In [None]:
# Train image

original_columns = 111
original_rows = 108
new_columns = original_columns - 6
new_rows = original_rows - 6

# Show the image
image = classifier.predict(X).reshape(new_rows, new_columns)
plt.imshow(image)

# Save as tiff
image_name = 'train image ' + model_name + '.tif'
imsave(image_name, np.uint8(Replace_1_To_255(image)))
files.download(image_name)

In [None]:
# New image

original_columns = 125
original_rows = 100
new_columns = original_columns - 6
new_rows = original_rows - 6

# Show the image
image = classifier.predict(X_new_image).reshape(new_rows, new_columns)
plt.imshow(image)

# Save as tiff
image_name = 'new image ' + model_name + '.tif'
imsave(image_name, np.uint8(Replace_1_To_255(image)))
files.download(image_name)

## Save the model

In [None]:
# Save the trained model
model_name_to_save = model_name + '.pkl'
joblib.dump(classifier, model_name_to_save)
files.download(model_name_to_save)