## DATA ORGANISATION

The images of cats and dogs are currently mixed in the `train` and `test1` directories. Before training the model, you need to separate these images into standard subdirectories for cats and dogs within each directory (`train` and `test1`). This organization is crucial for the proper labeling and training of the model.


In [None]:
# CREATE SEPERATE FOLDERS
# !mkdir -p train/train/cats train/train/dogs test1/test1/cats test1/test1/dogs

In [None]:
# MOVE IMAGES TO RESPECTIVE FOLDERS
# Dataset can be downloaded from link provided in README.md

import os
import shutil

train_dir = "train/train"
test_dir = "test1/test1"

def move_files(src_dir, dest_cat_dir, dest_dog_dir):
    for filename in os.listdir(src_dir):
        if filename.startswith('cat'):
            shutil.move(os.path.join(src_dir, filename), os.path.join(dest_cat_dir, filename))
        elif filename.startswith('dog'):
            shutil.move(os.path.join(src_dir, filename), os.path.join(dest_dog_dir, filename))
            
move_files(train_dir,'train/train/dogs')

In [None]:
# PREPROCESSING THE IMAGES

import numpy as np
from PIL import Image
data = []
labels = []

# Define class labels
classes = {'cats': 0, 'dogs': 1}

cwd = os.getcwd()

# Preprocessing the images
for class_name, label in classes.items():
    path = os.path.join(cwd, 'train/train', class_name)
    images = os.listdir(path)
    
    for image_file in images:
        try:
            image = Image.open(os.path.join(path, image_file))
            image = image.resize((32, 32))  # Resize image to 32x32
            image = np.array(image)         # Convert image to numpy array
            data.append(image)              # Append image to data list
            labels.append(label)            # Append label to labels list
        except Exception as e:
            print(f"Error processing image {image_file}: {e}")

# Convert lists to numpy arrays for better handling
data = np.array(data)
labels = np.array(labels)

In [None]:
# # USING TENSORFLOW FOR DATA ORGANISATION
# import tensorflow as tf
# tf.config.list_physical_devices('GPU')

# data = []
# labels = []

# cwd = os.getcwd()

# classes = {'cats': 0, 'dogs': 1}

# for class_name, label in classes.items():
#     path = os.path.join(cwd, 'train/train', class_name)
#     images = os.listdir(path)
    
#     for image_file in images:
#         try:
#             # Load the image using TensorFlow
#             image_path = os.path.join(path, image_file)
#             image = tf.io.read_file(image_path)
#             image = tf.image.decode_jpeg(image, channels=3)
#             image = tf.image.resize(image, [32, 32])  # Resize image to 32x32
#             image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
            
#             data.append(image)
#             labels.append(label)
#         except Exception as e:
#             print(f"Error processing image {image_file}: {e}")


In [None]:
len(data)

In [None]:
len(labels)

In [None]:
data = np.array(data)
labels = np.array(labels)

In [None]:
# SAVE THE LABELS AND DATA FOR FUTURE USES

# os.mkdir('trained_data')

np.save('trained_data/data', data)
np.save('trained_data/labels', labels)


In [None]:
data = np.load('trained_data/data.npy')
labels = np.load('trained_data/labels.npy')

In [None]:
# SPLIT THE DATA

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)

In [None]:
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

In [None]:

# LABELS TO ONE-HOT ENCODING

from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)

In [None]:
# BUILDING A BASIC CNN MODEL

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu',input_shape=X_train.shape[1:]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Dropout(0.25),

    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),    
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Dropout(0.25),
    
    tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(),    
    tf.keras.layers.Dropout(0.25),
    
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.BatchNormalization(),    
    tf.keras.layers.Dense(2, activation="softmax")                           
])

In [None]:
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer="adam",
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [None]:

history = model.fit(X_train, y_train, batch_size=64, epochs=30,
                    validation_data=(X_test,y_test))

In [None]:
import matplotlib.pyplot as plt

plt.figure(0)
plt.plot(history.history['accuracy'], label='training accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()


In [None]:
# EVALUATING OUR MODEL USING TEST IMAGES
from tensorflow.keras.preprocessing.image import load_img, img_to_array

test_folder = 'test1/test1'
image_size = (32,32)

test_images = []

for img_name in sorted(os.listdir(test_folder)):
    img_path = os.path.join(test_folder, img_name)
    
    img = load_img(img_path, target_size=image_size)
    img_array = img_to_array(img)
    img_array = img_array / 255.0
    test_images.append(img_array)
    
    
test_images = np.array(test_images)

In [None]:
preds = model.predict(test_images)

In [None]:
pred_classes = np.argmax(preds, axis=1)

In [None]:
# ACCURACY CHECK

from sklearn.metrics import accuracy_score
# classes = {'cats': 0, 'dogs': 1}
classes = [0, 1]


def test_img(img):
    data = []
    image = Image.open(img)
    image = image.resize((32,32))
    data.append(np.array(image))
    X_val = np.array(data)
    y_pred = np.argmax(model.predict(X_val), axis=-1)
    return image, y_pred

In [None]:
print(classes[1])

In [None]:

plot,prediction = test_img('test1/test1/22.jpg')
s = [str(i) for i in prediction] 
a = int("".join(s)) 
print("Predicted photo is: ", classes[a])
plt.imshow(plot)
plt.show()
