In [39]:
import os

# Ma'lumot yo'llarini o'rnatish
train_path = 'train'
test_path = 'test'
validation_path = 'validation'

# Papka ichidagi rasmlar sonini aniqlash
def count_images(data_path):
    total_images = 0
    for folder in range(33, 127):  # 33 dan 126 gacha
        folder_path = os.path.join(data_path, str(folder))
        if os.path.exists(folder_path):
            image_files = [f for f in os.listdir(folder_path) if f.endswith('.png')]
            total_images += len(image_files)
    return total_images

# Train, test, validation papkalaridagi rasmlar sonini chop etish
train_image_count = count_images(train_path)
test_image_count = count_images(test_path)
validation_image_count = count_images(validation_path)

print(f"Train papkasida {train_image_count} ta rasm bor.")
print(f"Test papkasida {test_image_count} ta rasm bor.")
print(f"Validation papkasida {validation_image_count} ta rasm bor.")


Train papkasida 72609 ta rasm bor.
Test papkasida 20719 ta rasm bor.
Validation papkasida 10463 ta rasm bor.


In [58]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Ma'lumotlarni o'qish va tayyorlash
def load_data(data_dir, csv_file):
    # CSV faylni o'qish
    df = pd.read_csv(csv_file)

    # Rasmlarni o'qish
    images = []
    labels = []
    valid_folders = [str(i) for i in range(33, 127) if i != 92] + ['999']
    label_mapping = {folder: idx for idx, folder in enumerate(valid_folders)}

    for folder in valid_folders:
        folder_path = os.path.join(data_dir, folder)
        if os.path.exists(folder_path):
            for image_file in os.listdir(folder_path):
                img = tf.keras.preprocessing.image.load_img(
                    os.path.join(folder_path, image_file),
                    color_mode='grayscale',
                    target_size=(64, 64)
                )
                img_array = tf.keras.preprocessing.image.img_to_array(img)
                images.append(img_array)
                labels.append(label_mapping[folder])

    return np.array(images), np.array(labels)

# Modelni yaratish
def create_model(num_classes):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Asosiy qism
def main():
    # Ma'lumotlarni yuklash
    train_images, train_labels = load_data('train/data', 'ascii_file_counts.csv')
    val_images, val_labels = load_data('validation/data', 'ascii_file_counts.csv')

    # Ma'lumotlarni normalizatsiya qilish
    train_images = train_images / 255.0
    val_images = val_images / 255.0

    # Modelni yaratish
    num_classes = 94  # 93 ta sinf (ASCII 33-126, 92 yo'q) + 1 (999 uchun)
    model = create_model(num_classes)

    # Modelni kompilatsiya qilish
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    # Modelni o'qitish
    history = model.fit(train_images, train_labels, epochs=10,
                        validation_data=(val_images, val_labels))

    # Modelni saqlash
    model.save('handwritten_text_recognition_model.h5')

if __name__ == "__main__":
    main()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


In [60]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_and_preprocess_image(image_path, target_size=(64, 64)):
    # Load the image
    img = load_img(image_path, color_mode='grayscale', target_size=target_size)

    # Convert the image to a numpy array
    img_array = img_to_array(img)

    # Normalize the image
    img_array = img_array / 255.0

    # Add batch dimension
    img_array = np.expand_dims(img_array, axis=0)

    return img_array

def predict_image(model, image_path):
    # Load and preprocess the image
    processed_image = load_and_preprocess_image(image_path)

    # Make prediction
    prediction = model.predict(processed_image)

    # Get the predicted class
    predicted_class = np.argmax(prediction)

    return predicted_class

def map_class_to_character(predicted_class):
    # Create a mapping from class index to ASCII value
    valid_ascii = [i for i in range(33, 127) if i != 92] + [999]
    class_to_ascii = {idx: ascii_val for idx, ascii_val in enumerate(valid_ascii)}

    # Get the ASCII value for the predicted class
    predicted_ascii = class_to_ascii[predicted_class]

    # Convert ASCII to character (except for 999)
    if predicted_ascii == 999:
        return "Unknown"
    else:
        return chr(predicted_ascii)

# Main prediction pipeline
def predict_image_pipeline(model_path, image_path):
    # Load the model
    model = load_model(model_path)

    # Predict the class
    predicted_class = predict_image(model, image_path)

    # Map the class to a character
    predicted_character = map_class_to_character(predicted_class)

    return predicted_character

# Example usage
model_path = 'handwritten_text_recognition_model.h5'
image_path = '/content/test/100/10824.jpg'

predicted_char = predict_image_pipeline(model_path, image_path)
print(f"The predicted character is: {predicted_char}")

The predicted character is: d
