In [None]:
# Importing necessary libraries
import sys 
#!{sys.executable} -m pip install pandas
#!{sys.executable} -m pip install opencv-python

#!{sys.executable} -m pip install matplotlib

#!{sys.executable} -m pip install scikit-learn

#!{sys.executable} -m pip install lightgbm 
import pandas as pd
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score


from tensorflow import keras
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (Rescaling, RandomFlip, RandomRotation, RandomZoom,
                                    Dense, Flatten, Dropout, Conv2D, MaxPooling2D)
from tensorflow.keras.utils import image_dataset_from_directory, plot_model
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.applications import VGG16

In [None]:
# Define some basic parameters

batch_size = 32
img_height = 150
img_width = 150
# Define the path to the datasets directories

train_path = './seg_train/'
test_path = './seg_test/'
pred_path = './seg_pred/'
# Define data loading function

def load_data(path, labels):
    dataset = image_dataset_from_directory(
        directory=path,
        labels=labels,
        seed=123,
        image_size=(img_height, img_width),
        batch_size=batch_size
    )
    return dataset

In [None]:
# # Load the dataset
# df = pd.read_csv('/content/drive/MyDrive/intel-image-classification/seg_train.csv')

# # Splitting into train and test set
# X = df.drop('label', axis=1)
# y = df['label']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the path to the dataset
data_dir = './seg_train'

# Load the images and labels
X = []
y = []
for label, folder_name in enumerate(os.listdir(data_dir)):
    folder_path = os.path.join(data_dir, folder_name)
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        img = cv2.imread(file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (64, 64)) # Resizing images to a common size
        X.append(img)
        y.append(label)

# Convert the images and labels to numpy arrays
X = np.array(X)
y = np.array(y)

# Splitting into train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing the images
X_train = X_train.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.

In [None]:
# Training the Random Forest model
rfc = RandomForestClassifier(n_estimators=100, random_state=42)
rfc.fit(X_train.reshape(X_train.shape[0], -1), y_train)
rfc_acc = accuracy_score(y_test, rfc.predict(X_test.reshape(X_test.shape[0], -1)))
print("Random Forest accuracy:", rfc_acc)

In [None]:
# Training the SVM model
svc = SVC(kernel='rbf', random_state=42)
svc.fit(X_train.reshape(X_train.shape[0], -1), y_train)
svc_acc = accuracy_score(y_test, svc.predict(X_test.reshape(X_test.shape[0], -1)))
print("SVM accuracy:", svc_acc)

In [None]:
# Training the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train.reshape(X_train.shape[0], -1), y_train)
knn_acc = accuracy_score(y_test, knn.predict(X_test.reshape(X_test.shape[0], -1)))
print("KNN accuracy:", knn_acc)

In [None]:
# Training the LightGBM model
lgbm = LGBMClassifier(random_state=42)
lgbm.fit(X_train.reshape(X_train.shape[0], -1), y_train)
lgbm_acc = accuracy_score(y_test, lgbm.predict(X_test.reshape(X_test.shape[0], -1)))
print("LightGBM accuracy:", lgbm_acc)

In [None]:
# Ensemble of the models
ensemble = VotingClassifier(estimators=[('rfc', rfc), ('svc', svc), ('knn', knn), ('lgbm', lgbm)], voting='hard')
ensemble.fit(X_train.reshape(X_train.shape[0], -1), y_train)
ensemble_acc = accuracy_score(y_test, ensemble.predict(X_test.reshape(X_test.shape[0], -1)))
print("Ensemble accuracy:", ensemble_acc)

In [None]:
# Saving the models
import joblib
joblib.dump(rfc, "rfc_model.pkl")
joblib.dump(svc, "svc_model.pkl")
joblib.dump(knn, "knn_model.pkl")
joblib.dump(lgbm, "lgbm_model.pkl")
joblib.dump(ensemble, "ensemble_model.pkl")

In [None]:
# Plotting the accuracy for each model
models = ['Random Forest', 'SVM', 'KNN', 'LightGBM', 'Ensemble']
accuracy = [rfc_acc, svc_acc, knn_acc, lgbm_acc, ensemble_acc]
plt.bar(models, accuracy)
plt.xlabel("Models")
plt.ylabel("Accuracy")
plt.title("Accuracy for each model")
plt.show()

In [None]:
# Plotting the learning graphs for each model
plt.plot(rfc.estimators_[0].feature_importances_)
plt.title("Feature importance - Random Forest")
plt.show()

In [None]:
plt.plot(svc.support_vectors_)
plt.title("Support vectors - SVM")
plt.show()

In [None]:
plt.plot(knn.kneighbors_graph(X_train)[0].toarray())
plt.title("K-Neighbors Graph - KNN")
plt.show()

In [None]:
plt.plot(lgbm.feature_importances_)
plt.title("Feature importance - LightGBM")
plt.show()