In [None]:
#!pip install git+https://github.com/adriangb/scikeras.git -q 

In [None]:
#!pip install tensorflow==2.7.0
!pip install scikeras

In [None]:
import os	
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier

import keras
from keras import layers

# Data preparation

## Loading the dataset

In [None]:
# Path to the dataset

DATA_DIR = '/kaggle/input/characters/data/train'

In [None]:
# Load the dataset
images = []
labels = []

dir = os.listdir(DATA_DIR)
num_samples = len(dir)

# Loop through each folder in the dataset
for i, folder in enumerate(os.listdir(DATA_DIR)):
    if((i + 1) % 5 == 0):
        print(f"{i + 1}/{num_samples}")
    folder_path = os.path.join(DATA_DIR, folder)
    if os.path.isdir(folder_path):
        # Extract the character label from the folder name (e.g., Sample001 -> '001')
        label = folder[6:]  # Assuming 'SampleXXX' format

        # Loop through each image in the folder
        for image_file in os.listdir(folder_path):
            image_path = os.path.join(folder_path, image_file)

            # Read the image in grayscale
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (28, 28))

            # Append image and label to the lists
            images.append(img)
            labels.append(label)

# Convert lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

In [None]:
# Reshape the images to add a channel dimension (for grayscale images)
images = images.reshape(-1, 28, 28, 1)

# Normalize pixel values to the range [0, 1]
X = images / 255.0

In [None]:
# Encode the labels using LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

In [None]:
print(X[0].shape)
np.unique(y)

## Check

In [None]:
# Select an image to visualize (e.g., the first image in the dataset)
image_index = 0

# Extract the corresponding image from the dataset (use squeeze() to remove the extra channel dimension)
image_to_show = X[image_index].squeeze()

# Display the image
plt.imshow(image_to_show, cmap='gray')
plt.title(f"Label: {label_encoder.inverse_transform([y[image_index]])[0]}")
plt.axis('off')  # Turn off axis
plt.show()

# Parameter Tuning

## Bayesian Optimization
using Keras tuner

In [None]:
from keras_tuner import HyperModel, Objective
import tensorflow as tf
from keras_tuner.tuners import BayesianOptimization

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# Create the keras tuner model.
class CNNHyperModel(HyperModel):
    
    def build(self, hp):
        model = keras.Sequential()
        model.add(layers.InputLayer(input_shape=(28, 28, 1)))

        # First convolutional layer
        model.add(layers.Conv2D(
            filters=64,
            kernel_size=(3, 3),
            activation='relu'
        ))
        model.add(layers.MaxPooling2D(pool_size=(2, 2)))

        # Second convolutional layer
        model.add(layers.Conv2D(
            filters=hp.Int('filters_2', min_value=96, max_value=224),
            kernel_size=(3, 3),
            activation='relu'
        ))
        model.add(layers.MaxPooling2D(pool_size=(2, 2)))

        # Flatten and Dense layer
        model.add(layers.Flatten())
        model.add(layers.Dense(
            units=hp.Int('dense_units', min_value=256, max_value=512),
            activation='relu'
        ))

        model.add(layers.Dropout(
            rate=hp.Float('dropout_rate', min_value=0.15, max_value=0.3)
        ))

        # Output layer
        model.add(layers.Dense(62, activation='softmax'))  # Assuming 62 classes for character classification

        # Compile the model
        optimizer_instance = keras.optimizers.Adam(
            learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
        )

        # Compile the model
        model.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',  # Use sparse categorical crossentropy for integer labels
            metrics=['accuracy']
        )

        return model

class MyTuner(BayesianOptimization):
    def __init__(self, *args, **kwargs):
        super(MyTuner, self).__init__(*args, **kwargs)
    def run_trial(self, trial, *args, **kwargs):
        # You can add additional HyperParameters for preprocessing and custom training loops
        # via overriding `run_trial`
        kwargs['batch_size'] = trial.hyperparameters.Int('batch_size', 80, 144)
        return super(MyTuner, self).run_trial(trial, *args, **kwargs)
      

In [None]:
# Bayesian Optimization using KerasTuner
tuner = MyTuner(
    CNNHyperModel(),
    objective='val_accuracy',  # Optimize for validation accuracy
    max_trials=10,  # Number of trials to run
    executions_per_trial=2,  # Number of executions for each trial
    directory='kt_search4',  # Directory to store the results
    project_name='cnn_bayesian_optimization'  # Project name
)

es = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0,
    patience=0,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0,
)

tuner.search(
    X_train, y_train,
    epochs=15,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[es]
)

In [None]:
# Get the best model and evaluate it
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()

# Evaluate the best model
val_loss, val_acc = best_model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {val_acc}")


best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]
print("Optimal Hyperparameters:")
for hp_name, value in best_trial.hyperparameters.values.items():
    print(f"{hp_name}: {value}")

## Cross Validation

In [None]:
n_splits = 7

kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

In [None]:
val_accuracies = []
optimal_epochs = []

# Loop through each split
for i, (train_index, val_index) in enumerate(kf.split(images)):
    print(f"------------ Fold {i + 1} / {n_splits} ------------")

    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Define the model
    model = keras.Sequential([
        layers.Input(shape=(28, 28, 1)),
        
        # First Convolutional Layer
        layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
    
        # Second Convolutional Layer
        layers.Conv2D(192, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
    
        # Flatten the output to feed into Dense layers
        layers.Flatten(),
    
        # Fully Connected Layer with Dropout
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.25),
    
        # Output Layer with Softmax activation for multi-class classification
        layers.Dense(62, activation='softmax')
    ])

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    early_stopping = keras.callbacks.EarlyStopping(monitor='val_accuracy', 
                               patience=3,  # Stop if no improvement in 3 epochs
                               restore_best_weights=True)
        
    # Train the model
    history = model.fit(X_train, y_train,
                            epochs=30,
                            batch_size=96,
                            validation_data=(X_val, y_val),
                            callbacks=[early_stopping],
                            verbose=1)

    val_accuracies.append(max(history.history['val_accuracy']))
    optimal_epochs.append(len(history.history['val_accuracy']))
    print(f"Accuracy: {max(history.history['val_accuracy'])}")

In [None]:
average_val_accuracy = np.mean(val_accuracies)
print(f'Average validation accuracy: {average_val_accuracy}')

average_optimal_epochs = int(sum(optimal_epochs) / len(optimal_epochs))
print(f"Average Optimal Epochs: {average_optimal_epochs}")

# Training

In [None]:
final_model = keras.Sequential([
    layers.Input(shape=(28, 28, 1)),
    
    # First Convolutional Layer
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Second Convolutional Layer
    layers.Conv2D(192, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Flatten the output to feed into Dense layers
    layers.Flatten(),

    # Fully Connected Layer with Dropout
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.25),

    # Output Layer with Softmax activation for multi-class classification
    layers.Dense(62, activation='softmax')
])

# Compile the model
final_model.compile(optimizer='adam',
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])

# Train the model
final_model.fit(X, y,
                epochs=average_optimal_epochs,
                batch_size=96,
                verbose=1)

# Prediction

In [None]:
test_folder_path = '/kaggle/input/characters/data/test'  

In [None]:
test_images = []
test_labels = []


dir = os.listdir(test_folder_path)
num_samples = len(dir)

for i, image_file in enumerate(os.listdir(test_folder_path)):
    if((i + 1) % 5 == 0):
        print(f"{i + 1}/{num_samples}")

    image_path = os.path.join(test_folder_path, image_file)

    # Read the image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Resize the image to 28x28
    img = cv2.resize(img, (28, 28))

    # Append image and label to the lists
    test_images.append(img)
    test_labels.append(image_file)

# Convert lists to numpy arrays
test_images = np.array(test_images)

# Reshape the images to add a channel dimension (for grayscale images)
test_images = test_images.reshape(-1, 28, 28, 1)

# Normalize pixel values to the range [0, 1]
test_images = test_images / 255.0


In [None]:
test_pred = final_model.predict(test_images)

In [None]:
test_pred.shape

In [None]:
with open('submission.txt', 'w') as f:
    for i in range(len(test_images)):
        
        predicted_class = np.argmax(test_pred[i]) + 1
        label = test_labels[i]
        
        f.write(f"{predicted_class};{label}\n")

print("Predictions saved to 'submission.txt'")