In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import cv2
import random

In [None]:
# Base directory containing train, val, test folders
base_dir = 'pls' 

train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

train_labels_csv = os.path.join(train_dir, 'labels.csv')
val_labels_csv = os.path.join(val_dir, 'labels.csv')
test_labels_csv = os.path.join(test_dir, 'labels.csv')

# --- Model & Training Parameters ---
IMG_WIDTH = 32
IMG_HEIGHT = 32
IMG_CHANNELS = 1 
INPUT_SHAPE = (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)
BATCH_SIZE = 64 
EPOCHS = 20     

In [None]:
try:
    train_df = pd.read_csv(train_labels_csv)
    val_df = pd.read_csv(val_labels_csv)
    test_df = pd.read_csv(test_labels_csv)

    print("Train DataFrame Head:")
    print(train_df.head())
    print(f"\nTotal Train Samples: {len(train_df)}")
    print(f"Total Validation Samples: {len(val_df)}")
    print(f"Total Test Samples: {len(test_df)}")

except FileNotFoundError as e:
    print(f"Error loading CSV: {e}")
    print("Please ensure the root directory and the 'labels.csv' files exist in the correct locations.")
    # Stop execution if files aren't found
    raise SystemExit("CSV file not found.")

print("\nMissing values in train_df:", train_df.isnull().sum().sum())
print("Missing values in val_df:", val_df.isnull().sum().sum())
print("Missing values in test_df:", test_df.isnull().sum().sum())

# Ensure column names are correct (adjust if needed)
required_columns = ['filename', 'words']
if not all(col in train_df.columns for col in required_columns):
     raise ValueError(f"Train CSV must contain columns: {required_columns}")
if not all(col in val_df.columns for col in required_columns):
     raise ValueError(f"Validation CSV must contain columns: {required_columns}")
if not all(col in test_df.columns for col in required_columns):
     raise ValueError(f"Test CSV must contain columns: {required_columns}")

label_column_name = 'words'

print(f"\nUsing label column: '{label_column_name}'")

Train DataFrame Head:
                     filename words
0  character_10_yna/13839.png     ञ
1  character_10_yna/79348.png     ञ
2  character_10_yna/23260.png     ञ
3  character_10_yna/57417.png     ञ
4  character_10_yna/19463.png     ञ

Total Train Samples: 73600
Total Validation Samples: 9200
Total Test Samples: 9200

Missing values in train_df: 0
Missing values in val_df: 0
Missing values in test_df: 0

Using label column: 'words'

In [None]:
# Finding all the unique characters across train, val, test sets
all_labels = pd.concat([train_df[label_column_name], 
                        val_df[label_column_name], 
                        test_df[label_column_name]], ignore_index=True)
unique_labels = sorted(all_labels.unique())
num_classes = len(unique_labels)

print(f"\nTotal unique classes (Devanagari characters): {num_classes}")

label_to_int = {label: i for i, label in enumerate(unique_labels)}
int_to_label = {i: label for label, i in label_to_int.items()}
print(int_to_label)

Total unique classes (Devanagari characters): 46
{0: 'क', 1: 'क्ष', 2: 'ख', 3: 'ग', 4: 'घ', 5: 'ङ', 6: 'च', 7: 'छ', 8: 'ज', 9: 'ज्ञ', 10: 'झ', 11: 'ञ', 12: 'ट', 13: 'ठ', 14: 'ड', 15: 'ढ', 16: 'ण', 17: 'त', 18: 'त्र', 19: 'थ', 20: 'द', 21: 'ध', 22: 'न', 23: 'प', 24: 'फ', 25: 'ब', 26: 'भ', 27: 'म', 28: 'य', 29: 'र', 30: 'ल', 31: 'व', 32: 'श', 33: 'ष', 34: 'स', 35: 'ह', 36: '०', 37: '१', 38: '२', 39: '३', 40: '४', 41: '५', 42: '६', 43: '७', 44: '८', 45: '९'}


In [None]:
print(unique_labels)

['क', 'क्ष', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'ज्ञ', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'त्र', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', '०', '१', '२', '३', '४', '५', '६', '७', '८', '९']


In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=INPUT_SHAPE),

        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),          

        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Flatten(),
        layers.Dropout(0.5),                                    # Dropout for regularization
        layers.Dense(256, activation="relu"),                   # Add a dense layer
        layers.Dropout(0.3),                                    # Optional dropout
        layers.Dense(num_classes, activation="softmax"),        # Output layer
    ]
)

model.summary()

In [None]:
#aryan idhar summary generate karva k push kardena 

In [None]:
# Define ImageDataGenerator
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,      # degrees
    width_shift_range=0.1,  # fraction of total width
    height_shift_range=0.1, # fraction of total height
    shear_range=0.1,        # shear intensity (angle in counter-clockwise direction)
    zoom_range=0.1,         # random zoom range [1-zoom_range, 1+zoom_range]
    fill_mode='nearest'     # strategy used for filling points outside the input boundaries
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

# --- Create Generators ---
print("\nCreating Training Generator...")
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=train_dir,  # Directory where images referenced in df are located
    x_col='filename',     # Column in df that contains the filenames
    y_col=label_column_name,       # Column in df that has the target labels (characters)
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode='grayscale' if IMG_CHANNELS == 1 else 'rgb', # 'grayscale' or 'rgb'
    classes=unique_labels, # Explicitly provide the class labels in sorted order
    class_mode='categorical', # Returns 2D one-hot encoded labels
    batch_size=BATCH_SIZE,
    shuffle=True,       
    seed=42               # For reproducibility
)

print("\nCreating Validation Generator...")
validation_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=val_dir,
    x_col='filename',
    y_col=label_column_name,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode='grayscale' if IMG_CHANNELS == 1 else 'rgb',
    classes=unique_labels,
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)

print("\nCreating Test Generator...")
test_generator = val_test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=test_dir,
    x_col='filename',
    y_col=label_column_name,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    color_mode='grayscale' if IMG_CHANNELS == 1 else 'rgb',
    classes=unique_labels,
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)

try:
    x_batch, y_batch = next(train_generator)
    print(f"\nSample batch shape: images={x_batch.shape}, labels={y_batch.shape}")
    # Check image range
    print(f"Image data range: min={np.min(x_batch)}, max={np.max(x_batch)}")
except Exception as e:
    print(f"\nError fetching batch from train_generator: {e}")
    print("Please check image paths in 'pls/train/labels.csv' and ensure images exist and are readable.")
    print("Example path expected by generator:", os.path.join(train_dir, train_df['filename'].iloc[0]))
    raise SystemExit("Error creating data generator.")


Creating Training Generator...
Found 73600 validated image filenames belonging to 46 classes.

Creating Validation Generator...
Found 9200 validated image filenames belonging to 46 classes.

Creating Test Generator...
Found 9200 validated image filenames belonging to 46 classes.

Sample batch shape: images=(64, 32, 32, 1), labels=(64, 46)
Image data range: min=0.0, max=1.0


In [None]:
optimizer = keras.optimizers.Adam(learning_rate=0.001)
loss_function = "categorical_crossentropy"

model.compile(loss=loss_function, optimizer=optimizer, metrics=["accuracy"])

In [None]:
# Optional Callbacks
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_accuracy',   # Monitor validation accuracy
    patience=5,               # Number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=True # Restore model weights from the epoch with the best value of the monitored quantity.
)

# Reduce learning rate if validation loss plateaus
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2, # Factor by which the learning rate will be reduced. new_lr = lr * factor
    patience=3,
    min_lr=0.00001,
    verbose=1
)

print("\nStarting Training...")

# Calculate steps per epoch
steps_per_epoch = len(train_generator) 
validation_steps = len(validation_generator)

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    callbacks=[early_stopping, reduce_lr]
)

print("\nTraining Finished.")

In [None]:
print("\nEvaluating on Test Set...")

test_steps = len(test_generator)

loss, accuracy = model.evaluate(
    test_generator,
    steps=test_steps, # Ensure all test samples are evaluated
    verbose=1
)

print(f"\nTest Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

In [None]:
#aryan idhar bhi test accuracy run karva k dikha dena 


Evaluating on Test Set...
144/144 ━━━━━━━━━━━━━━━━━━━━ 7s 51ms/step - accuracy: 0.9857 - loss: 0.0490

Test Loss: 0.0420
Test Accuracy: 0.9879 (98.79%)

yeh wali hata dena nakli lag rhi hai

In [None]:
model.save('first.keras')

In [None]:
#to load the model later
model = tf.keras.models.load_model('first.keras')