In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'tom-and-jerry-image-classification:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F2136537%2F3682957%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240922%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240922T111302Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D5f747b23f1aac2030748c2c9bfc7bcdcb03677ea459ffc0d150bab9b6b53dfabe07ac7ebe1978619841049051aba0d6428ea89bf301453545e0d21eb8f8484d1dfae9377eda76e1177bc59c5c9757379d6a2d4ae08022482ea0dc963911adde7c46d6b4f11ebe728cb626a8f214e231623a9bfa18baceae15a69ba996feda8e7d8f88444e65a05f3a7b70e9cd68dfbb5d058c35adc264a3b62e5a7447ef3a973e68efa264b51b771367439312f97f571d056e5ecf5db5fdd0b032105ae4a948145ebcb949d047eb53849cacb3434185c0156e2de2d230e8d13b1aa2e5bc2477603fae7ea887b3aace8ecb52b381a6188e70d27b64a342c52a8908965627a6597'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading tom-and-jerry-image-classification, 456031556 bytes compressed
Downloaded and uncompressed: tom-and-jerry-image-classification
Data source import complete.


### Initialize Libraries and project

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pickle

### Loading the dataset

In [None]:
df = pd.read_csv('/kaggle/input/tom-and-jerry-image-classification/ground_truth.csv')

# Define image size and dataset folder paths
IMG_SIZE = (128, 128)
dataset_folder = '/kaggle/input/tom-and-jerry-image-classification/tom_and_jerry/tom_and_jerry/'

# Dictionary to map subfolders to labels
label_mapping = {
    'tom': 0,
    'jerry': 1,
    'tom_jerry_1': 2,
    'tom_jerry_0': 3
}

# Function to load images from each subfolder and assign corresponding labels
def load_images_from_folders(dataset_folder, label_mapping):
    images = []
    labels = []

    for folder, label in label_mapping.items():
        folder_path = os.path.join(dataset_folder, folder)
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            img = cv2.imread(file_path)
            if img is not None:
                img = cv2.resize(img, IMG_SIZE)
                img = img / 255.0  # Normalize the image
                images.append(img)
                labels.append(label)
            else:
                print(f"Image not found or could not be loaded: {file_path}")

    return np.array(images), np.array(labels)

# Load images and corresponding labels
images, labels = load_images_from_folders(dataset_folder, label_mapping)

### Updated Random Forest and SVM

In [None]:
# Import necessary libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import pickle

# Function to flatten images for Random Forest and SVM
def flatten_images(images):
    return images.reshape(images.shape[0], -1)  # Reshape images to (num_samples, num_features)

# Flatten images
X_flattened = flatten_images(images)

# Split the data into training and testing sets for RF and SVM
X_train_flat, X_test_flat, y_train_flat, y_test_flat = train_test_split(X_flattened, labels, test_size=0.2, random_state=42)

### Random Forest Model ###

# Train a Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_flat, y_train_flat)

# Make predictions using Random Forest
rf_pred = rf_model.predict(X_test_flat)

# Evaluate the Random Forest model
print("\nRandom Forest Classifier Report:")
print(classification_report(y_test_flat, rf_pred))
print(f"Random Forest Accuracy: {accuracy_score(y_test_flat, rf_pred)}")

# Save the Random Forest model to a pickle file
rf_model_filename = 'rf_model_tom_and_jerry.pkl'
with open(rf_model_filename, 'wb') as rf_file:
    pickle.dump(rf_model, rf_file)
print(f"Random Forest model saved to {rf_model_filename}")




Random Forest Classifier Report:
              precision    recall  f1-score   support

           0       0.84      0.90      0.87       410
           1       0.77      0.79      0.78       231
           2       0.88      0.76      0.82       156
           3       0.79      0.75      0.77       299

    accuracy                           0.81      1096
   macro avg       0.82      0.80      0.81      1096
weighted avg       0.82      0.81      0.81      1096

Random Forest Accuracy: 0.8147810218978102
Random Forest model saved to rf_model_tom_and_jerry.pkl


In [None]:
### SVM Model ###

# Train an SVM classifier (use a linear kernel)
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_flat, y_train_flat)

# Make predictions using SVM
svm_pred = svm_model.predict(X_test_flat)

# Evaluate the SVM model
print("\nSVM Classifier Report:")
print(classification_report(y_test_flat, svm_pred))
print(f"SVM Accuracy: {accuracy_score(y_test_flat, svm_pred)}")

# Save the SVM model to a pickle file
svm_model_filename = 'models/svm_model_tom_and_jerry.pkl'
with open(svm_model_filename, 'wb') as svm_file:
    pickle.dump(svm_model, svm_file)
print(f"SVM model saved to {svm_model_filename}")



#### Data Preparation

In [None]:

# Prepare labels for Random Forest and SVM (flatten for binary classification)
y_train_rf_svm = [1 if (tom == 1 or jerry == 1) else 0 for tom, jerry in y_train]
y_test_rf_svm = [1 if (tom == 1 or jerry == 1) else 0 for tom, jerry in y_test]

# Flatten the images for SVM and Random Forest (as they don't accept image tensors)
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

### SVM model

In [None]:
# Train an SVM Classifier
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_flat, y_train_rf_svm)

# Predict and evaluate SVM
svm_pred = svm_model.predict(X_test_flat)
print("\nSVM Classifier Report:")
print(classification_report(y_test_rf_svm, svm_pred))
print(f"SVM Accuracy: {accuracy_score(y_test_rf_svm, svm_pred)}")

### CNN Model

In [None]:
from tensorflow.keras.utils import to_categorical
# One-hot encode the labels
labels_one_hot = to_categorical(labels, num_classes=4)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels_one_hot, test_size=0.2, random_state=42)

# Build a CNN model for multi-class classification
cnn_model = Sequential()

# Add convolutional, pooling, and fully connected layers
cnn_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(128, 128, 3)))
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))

cnn_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))

cnn_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
cnn_model.add(MaxPooling2D(pool_size=(2, 2)))

cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dense(4, activation='softmax'))  # 4 classes (Tom, Jerry, both, neither)

# Compile the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = cnn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_accuracy = cnn_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

# Save the CNN model for Flask deployment
cnn_model.save('models/cnn_model_tom_and_jerry.h5')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 1s/step - accuracy: 0.3666 - loss: 1.3060 - val_accuracy: 0.5137 - val_loss: 1.1324
Epoch 2/10
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 1s/step - accuracy: 0.6223 - loss: 0.9031 - val_accuracy: 0.6861 - val_loss: 0.7996
Epoch 3/10
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 1s/step - accuracy: 0.7851 - loss: 0.5570 - val_accuracy: 0.7819 - val_loss: 0.6006
Epoch 4/10
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 1s/step - accuracy: 0.8842 - loss: 0.3476 - val_accuracy: 0.8093 - val_loss: 0.6156
Epoch 5/10
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 1s/step - accuracy: 0.9164 - loss: 0.2392 - val_accuracy: 0.8130 - val_loss: 0.6255
Epoch 6/10
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 1s/step - accuracy: 0.9417 - loss: 0.1656 - val_accuracy: 0.7974 - val_loss: 0.6508
Epoch 7/10
[1m137/137



Test Accuracy: 0.8220803141593933


#### Random forest model

In [None]:
# Train a Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_flat, y_train_rf_svm)

# Predict and evaluate Random Forest
rf_pred = rf_model.predict(X_test_flat)
print("Random Forest Classifier Report:")
print(classification_report(y_test_rf_svm, rf_pred))
print(f"Random Forest Accuracy: {accuracy_score(y_test_rf_svm, rf_pred)}")

#### Deploying

In [None]:
# Directory to save the models
model_dir = 'models/'
if not os.path.exists(model_dir):
    os.makedirs(model_dir)

In [None]:
### Saving the Random Forest Model ###
rf_model_filename = os.path.join(model_dir, 'rf_model.pkl')
with open(rf_model_filename, 'wb') as rf_file:
    pickle.dump(rf_model, rf_file)
print(f"Random Forest model saved to {rf_model_filename}")


In [None]:
### Saving the SVM Model ###
svm_model_filename = os.path.join(model_dir, 'svm_model.pkl')
with open(svm_model_filename, 'wb') as svm_file:
    pickle.dump(svm_model, svm_file)
print(f"SVM model saved to {svm_model_filename}")

In [None]:
### Saving the CNN Model ###
# TensorFlow models are generally saved using their built-in method rather than with pickle
cnn_model_filename = os.path.join(model_dir, 'cnn_model.h5')
cnn_model.save(cnn_model_filename)
print(f"CNN model saved to {cnn_model_filename}")