In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [2]:
import datetime

In [3]:
def load_data(file_path, num_samples, data_dir, image_size):
    images = []
    labels = []
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines[:num_samples]:
            img_path, label = line.strip().split(' ')
            img = load_img(os.path.join(data_dir, 'TeluguSeg/', img_path), target_size=image_size, color_mode='grayscale')
            img_array = img_to_array(img) / 255.0  # Normalize pixel values
            images.append(img_array)
            labels.append(label)
    return np.array(images), labels

In [4]:
# Load and preprocess data
data_dir = "C:/Users/saikiran.golla/Project/IIIT-HW-Telugu_v1.tar/IIIT-HW-Telugu_v1/TeluguSeg.tar/"
image_size = (128, 128)  # Adjust as needed
num_training_samples = 60000
num_validation_samples = 12000
num_test_samples = 10000

In [5]:
import psutil

In [6]:
free = psutil.virtual_memory()
print("Free memory:", free.free)
print("Used memory:", free.used)

Free memory: 547110912
Used memory: 7712247808


In [None]:
t_start = datetime.datetime.now()
print(t_start)
X_train, y_train = load_data(os.path.join(data_dir, 'train.txt'), num_training_samples,data_dir,image_size)
X_val, y_val = load_data(os.path.join(data_dir, 'val.txt'), num_validation_samples,data_dir,image_size)
X_test, y_test = load_data(os.path.join(data_dir, 'test.txt'), num_test_samples,data_dir,image_size)
t_end = datetime.datetime.now()
print("time taken :",t_end-t_start)

2023-08-30 19:59:05.278778


In [8]:
# Label encoding
label_encoder = LabelEncoder()
label_encoder.fit(y_train + y_val + y_test)
y_train_encoded = label_encoder.transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)



In [9]:
len(y_test_encoded)

10000

In [10]:
num_classes = len(label_encoder.classes_)

In [11]:
print(num_classes)

12811


In [12]:
# Build your OCR model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(128, 128, 1)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')  # Use num_classes here
])

In [13]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [14]:
# Train the model
history = model.fit(X_train, y_train_encoded, validation_data=(X_val, y_val_encoded), epochs=25, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
# Evaluate model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded, verbose=2)
print(f"Test Accuracy: {test_accuracy:.4f}")

313/313 - 21s - loss: 10.3486 - accuracy: 2.0000e-04 - 21s/epoch - 66ms/step
Test Accuracy: 0.0002


In [None]:
C:\Users\saikiran.golla\Project\IIIT-HW-Telugu_v1.tar\IIIT-HW-Telugu_v1\TeluguSeg.tar\TeluguSeg\TeluguSeg\train\7\296\20.jpg

In [16]:
# Test the model on a new image
new_image_path = "C:/Users/saikiran.golla/Project/IIIT-HW-Telugu_v1.tar/IIIT-HW-Telugu_v1/TeluguSeg.tar/TeluguSeg/TeluguSeg/train/7/296/20.jpg"
new_image = load_img(new_image_path, target_size=image_size, color_mode='grayscale')
new_image_array = img_to_array(new_image) / 255.0
new_image_array = np.expand_dims(new_image_array, axis=0)
predicted_class_index = model.predict(new_image_array).argmax()
predicted_class = label_encoder.inverse_transform([predicted_class_index])[0]
print(f"Predicted Class: {predicted_class}")

Predicted Class: ౯౭


In [16]:
# Save the trained model
model.save('telugu_ocr_model_v8.h5')


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder

# Define your data directories and parameters
data_dir = '/path/to/dataset'
image_size = (128, 128)
num_training_samples = 2000
num_validation_samples = 500
num_test_samples = 500

# Load data
def load_data(file_path, num_samples):
    images = []
    labels = []
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines[:num_samples]:
            img_path, label = line.strip().split(' ')
            img = load_img(os.path.join(data_dir, img_path), target_size=image_size, color_mode='grayscale')
            img_array = img_to_array(img) / 255.0  # Normalize pixel values
            images.append(img_array)
            labels.append(label)
    return np.array(images), labels

X_train, y_train = load_data(os.path.join(data_dir, 'train.txt'), num_training_samples)
X_val, y_val = load_data(os.path.join(data_dir, 'val.txt'), num_validation_samples)
X_test, y_test = load_data(os.path.join(data_dir, 'test.txt'), num_test_samples)

# Label encoding
label_encoder = LabelEncoder()
label_encoder.fit(y_train + y_val + y_test)
y_train_encoded = label_encoder.transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# Load and compile your model
model = tf.keras.models.load_model('path_to_your_saved_model')
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_encoded, validation_data=(X_val, y_val_encoded), epochs=10, batch_size=32)

# Evaluate model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded, verbose=2)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Test the model on a new image
new_image_path = '/path/to/new/image.jpg'
new_image = load_img(new_image_path, target_size=image_size, color_mode='grayscale')
new_image_array = img_to_array(new_image) / 255.0
new_image_array = np.expand_dims(new_image_array, axis=0)
predicted_class_index = model.predict(new_image_array).argmax()
predicted_class = label_encoder.inverse_transform([predicted_class_index])[0]
print(f"Predicted Class: {predicted_class}")
