In [1]:
import os
import pickle

from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
import gdown
import shutil

# Destination directory where the user will download the data https://drive.google.com/file/d/11PI6wGDjuJ3LiPSm3izbVcmVTX7DDVLn/view?usp=drive_link
destination_path = '/content'

# Download the ZIP file
zip_file_path = destination_path + '/Data.zip'

# Extract the downloaded ZIP file
shutil.unpack_archive(zip_file_path, destination_path)

print("Data has been downloaded and extracted to:", destination_path)

Data has been downloaded and extracted to: /content


In [5]:
# prepare data
input_dir = '/content/Data'
categories = ['empty', 'not_empty']

data = []
labels = []
for category_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(input_dir, category)):
        img_path = os.path.join(input_dir, category, file)
        img = imread(img_path)
        img = resize(img, (25, 25))
        data.append(img.flatten())
        labels.append(category_idx)

data = np.asarray(data)
labels = np.asarray(labels)

# train / test split
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)


#Models training

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

# Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(x_train, y_train)
rf_predictions = rf_classifier.predict(x_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f"Random Forest Accuracy: {rf_accuracy}")




Random Forest Accuracy: 1.0


In [7]:
# XGBoost
xgb_classifier = XGBClassifier(learning_rate=0.1, n_estimators=100, random_state=42)
xgb_classifier.fit(x_train, y_train)
xgb_predictions = xgb_classifier.predict(x_test)
xgb_accuracy = accuracy_score(y_test, xgb_predictions)
print(f"XGBoost Accuracy: {xgb_accuracy}")


XGBoost Accuracy: 1.0


In [8]:
# train classifier
classifier = SVC()

parameters = [{'gamma': [0.01, 0.001, 0.0001], 'C': [1, 10, 100, 1000]}]

grid_search = GridSearchCV(classifier, parameters)

grid_search.fit(x_train, y_train)

# test performance
best_estimator = grid_search.best_estimator_

In [9]:
y_prediction = best_estimator.predict(x_test)

score = accuracy_score(y_prediction, y_test)

print('{}% of samples were correctly classified'.format(str(score * 100)))

#pickle.dump(best_estimator, open('./model.p', 'wb'))

100.0% of samples were correctly classified


# save model as pickel file

In [10]:
pickle.dump(xgb_classifier, open('./model.p', 'wb'))