<a href="https://colab.research.google.com/github/ZemljakM/RUAP-projekt/blob/main/EDAandModelTrainingAnimals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install scikit-plot

In [None]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import seaborn as sns
from tensorflow.keras.preprocessing.image import load_img
from keras.models import Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input, BatchNormalization
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import os
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import scikitplot as skplt
from google.colab import files

In [None]:
uploaded = files.upload()

source_path = '/content/kaggle.json'
destination_path = '/root/.kaggle/kaggle.json'
kaggle_dir = '/root/.kaggle/'
if not os.path.exists(kaggle_dir):
    os.makedirs(kaggle_dir)
os.rename(source_path, destination_path)

In [None]:
! kaggle datasets download -d alessiocorrado99/animals10

In [None]:
with zipfile.ZipFile('/content/animals10.zip', 'r') as zip_ref:
  zip_ref.extractall('/content')

In [None]:
def plot_animals(df):
    plt.figure(figsize=(15, 5))
    df['category_names'] = df['category'].map(animal_mapping)
    sns.countplot(data=df, x='category_names', edgecolor='black')
    plt.title('Animal Distribution')
    plt.xlabel('Animal Category')
    plt.ylabel('Count')
    plt.xticks(rotation=45, ha='right')
    plt.show()

In [None]:
def extract_features(images, height, width):
    features = []
    for image in tqdm(images):
        var_img = load_img(image, color_mode='grayscale')
        var_img = var_img.resize((height,width), Image.Resampling.LANCZOS)
        var_img = np.array(var_img)
        var_img = var_img / 255.0
        features.append(var_img)
    features = np.array(features)
    features = features.reshape(len(features), height, width, 1)
    features = np.array(features, dtype=np.float32)
    return features

In [None]:
def display_images(images, labels, num_images=5):
    for i in range(num_images):
        plt.subplot(1, num_images, i+1)
        plt.imshow(images[i+1000], cmap='gray')
        plt.title(np.argmax(labels[i+1000]))
        plt.axis('off')
    plt.show()

In [None]:
def create_model(input_shape,num_classes):
    inputs = Input(input_shape)

    conv_1 = Conv2D(32, kernel_size=(3,3), activation='relu', padding='same')(inputs)
    batch_1 = BatchNormalization()(conv_1)
    conv_2 = Conv2D(64, kernel_size=(3,3), activation='relu', padding='same')(batch_1)
    batch_2 = BatchNormalization()(conv_2)

    maxpool_1 = MaxPooling2D(pool_size=(2,2))(batch_2)
    dropout_1 = Dropout(0.25)(maxpool_1)

    conv_3 = Conv2D(128, kernel_size=(3,3), activation='relu', padding='same')(dropout_1)
    batch_3 = BatchNormalization()(conv_3)
    conv_4 = Conv2D(128, kernel_size=(3,3), activation='relu', padding='same')(batch_3)
    batch_4 = BatchNormalization()(conv_4)

    maxpool_2 = MaxPooling2D(pool_size=(2,2))(batch_4)
    dropout_2 = Dropout(0.25)(maxpool_2)

    conv_5 = Conv2D(256, kernel_size=(3,3), activation='relu', padding='same')(dropout_2)
    batch_5 = BatchNormalization()(conv_5)
    conv_6 = Conv2D(256, kernel_size=(3,3), activation='relu', padding='same')(batch_5)
    batch_6 = BatchNormalization()(conv_6)

    maxpool_3 = MaxPooling2D(pool_size=(2,2))(batch_6)
    dropout_3 = Dropout(0.25)(maxpool_3)

    conv_7 = Conv2D(512, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=l2(0.01))(dropout_3)
    batch_7 = BatchNormalization()(conv_7)
    conv_8 = Conv2D(512, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=l2(0.01))(batch_7)
    batch_8 = BatchNormalization()(conv_8)

    maxpool_4 = MaxPooling2D(pool_size=(2,2))(batch_8)
    dropout_4 = Dropout(0.25)(maxpool_4)

    conv_9 = Conv2D(512, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=l2(0.01))(dropout_4)
    batch_9 = BatchNormalization()(conv_9)
    conv_10 = Conv2D(512, kernel_size=(3,3), activation='relu', padding='same', kernel_regularizer=l2(0.01))(batch_9)
    batch_10 = BatchNormalization()(conv_10)

    flatten = Flatten()(batch_10)

    dense_1 = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(flatten)

    dropout1d_1 = Dropout(0.3)(dense_1)

    dense_2 = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(dropout1d_1)

    output = Dense(num_classes, activation='softmax', name="animal_out")(dense_2)

    model = Model(inputs=[inputs], outputs=[output])
    return model

In [None]:
foldernames = os.listdir('/content/raw-img/')
categories = []
files = []
i = 0
for k, folder in enumerate(foldernames):
    filenames = os.listdir("/content/raw-img/" + folder);
    for file in filenames:
        files.append("/content/raw-img/" + folder + "/" + file)
        categories.append(k)

df = pd.DataFrame({
    'filename': files,
    'category': categories
})

In [None]:
animal_mapping = {
    0: 'Krava',
    1: 'Leptir',
    2: 'Pas',
    3: 'Macka',
    4: 'Konj',
    5: 'Vjeverica',
    6: 'Slon',
    7: 'Ovca',
    8: 'Pauk',
    9: 'Kokos'
}

In [None]:
plot_animals(df)

In [None]:
image_height = 128
image_width = 128
num_classes = 10

In [None]:
X = extract_features(df["filename"],image_height,image_width)

y = np.array(df["category"])

y = np.array(list(df["category"].apply(lambda x: to_categorical(x, num_classes=num_classes))))

input_shape = (image_height,image_width,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
display_images(X_train, y_train)

In [None]:
opt = Adam(learning_rate=0.005)

In [None]:
# Nas model #

model = create_model(input_shape, num_classes)

model.compile(loss="categorical_crossentropy", optimizer = opt, metrics='accuracy' )

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

lr_scheduler = ReduceLROnPlateau(factor=0.5, patience=5)

history = model.fit(x=X_train, y=y_train, batch_size=64, epochs=100, validation_split=0.2, callbacks=[lr_scheduler, early_stopping])

In [None]:
model.save('animalmodel.h5')


In [None]:
def display_acc_graph(history):
    acc = history.history["accuracy"]
    val_acc = history.history["val_accuracy"]
    epochs = range(len(acc))
    plt.plot(epochs, acc, 'b', label="Training Accuracy")
    plt.plot(epochs, val_acc, 'r', label="Validation Accuracy")
    plt.title("Accuracy Graph")
    plt.legend()
    plt.figure()

def display_loss_graph(history):
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs = range(len(loss))
    plt.plot(epochs, loss, 'b', label="Training Loss")
    plt.plot(epochs, val_loss, 'r', label="Validation Loss")
    plt.title("Loss Graph")
    plt.legend()
    plt.show()

In [None]:
display_acc_graph(history)
display_loss_graph(history)

In [None]:
animalmodel = load_model('animalmodel.h5')
animalmodel.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

y_pred_animal = animalmodel.predict(X_test)
y_pred_animal = y_pred_animal.argmax(axis=1)
y_test = y_test.argmax(axis=1)
print(classification_report(y_test, y_pred_animal))

conf_matrix_age = confusion_matrix(y_test, y_pred_animal)
print(conf_matrix_age)

skplt.metrics.plot_confusion_matrix(y_test, y_pred_animal, figsize=(8, 6), cmap='Blues')
plt.xlabel('Predicted Animal')
plt.ylabel('True Animal')
plt.title('Confusion Matrix for Animal')
plt.show()


In [None]:
# KNN #
import pickle
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.reshape(X_train[0],-1)
X_test = X_test.reshape(X_test[0],-1)

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print(classification_report(y_test, y_pred))

In [None]:
conf_matrix_age = confusion_matrix(y_test, y_pred)
print(conf_matrix_age)

skplt.metrics.plot_confusion_matrix(y_test, y_pred, figsize=(8, 6), cmap='Blues')
plt.xlabel('Predicted Animal')
plt.ylabel('True Animal')
plt.title('Confusion Matrix for Animals')
plt.show()

In [None]:
with open('knn_animal_model.pkl', 'wb') as model_file:
    pickle.dump(knn_model, model_file)

# with open('knn_model.pkl', 'rb') as model_file:
#     knn_model = pickle.load(model_file)

In [None]:
from google.colab import files
files.download("/content/knn_model.pkl")

In [None]:
# MobileNetV2 #

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam

base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

for layer in base_model.layers:
    layer.trainable = False

model = models.Sequential([
    layers.Conv2D(3, (1, 1), input_shape=(128, 128, 1)),
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

lr_scheduler = ReduceLROnPlateau(factor=0.5, patience=5)

history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_split=0.2, callbacks=[lr_scheduler, early_stopping])

In [None]:
model.save('mobileanimalmodel.h5')

In [None]:
files.download("/content/mobileanimalmodel.h5")

In [None]:
display_acc_graph(history)
display_loss_graph(history)

In [None]:
animalmodel = load_model('mobileanimalmodel.h5')
animalmodel.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

y_pred_animal = animalmodel.predict(X_test)
y_pred_animal = y_pred_animal.argmax(axis=1)
y_test = y_test.argmax(axis=1)

print(classification_report(y_test, y_pred_animal))

conf_matrix_age = confusion_matrix(y_test, y_pred_animal)
print(conf_matrix_age)

skplt.metrics.plot_confusion_matrix(y_test, y_pred_animal, figsize=(8, 6), cmap='Blues')
plt.xlabel('Predicted Animal')
plt.ylabel('True Animal')
plt.title('Confusion Matrix for Animals')
plt.show()

In [None]:
# RF #

import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(y_train.shape)

X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_test_flattened = X_test.reshape(X_test.shape[0], -1)

rf_model = RandomForestClassifier(n_estimators=200, criterion='gini', random_state=42)

y_train_labels = np.argmax(y_train, axis=1)
rf_model.fit(X_train_flattened, y_train_labels)

y_test_labels = np.argmax(y_test, axis=1)
y_pred = rf_model.predict(X_test_flattened)

accuracy = accuracy_score(y_test_labels, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print(classification_report(y_test_labels, y_pred))

conf_matrix = confusion_matrix(y_test_labels, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()