In [2]:
# this is for training data conversion 

import numpy as np
from PIL import Image
import os

def read_dat_file(file_path):
    encodings = ['utf-8', 'cp1252', 'latin1']
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as file:
                lines = file.readlines()
            break
        except UnicodeDecodeError:
            continue
    else:
        raise UnicodeDecodeError(f"Cannot decode file: {file_path}")

    data = []
    max_length = 0

    for line in lines:
        row = [int(char) for char in line.strip() if char in '01']
        data.append(row)
        if len(row) > max_length:
            max_length = len(row)

    # Pad rows to ensure they have the same length
    padded_data = [row + [0] * (max_length - len(row)) for row in data]
    return np.array(padded_data)

def save_grayscale_image(array, output_path):
    # Normalize array values to range [0, 255]
    normalized_array = (array / np.max(array) * 255).astype(np.uint8)
    image = Image.fromarray(normalized_array, 'L')
    image.save(output_path)

def process_and_save_images_from_directory(dat_directory, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(dat_directory):
        if filename.endswith('.dat'):
            dat_file_path = os.path.join(dat_directory, filename)
            image_data = read_dat_file(dat_file_path)
            # Name output image the same as the .dat file, but with .png extension
            output_filename = filename.replace('.dat', '.png')
            output_path = os.path.join(output_folder, output_filename)
            save_grayscale_image(image_data, output_path)
            print(f"Image saved to: {output_path}")

# Updated Directory for Training Set
dat_directory = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\HWR-Gurmukhi__3.1\HWR-Gurmukhi__3.1\Training_Set'
output_folder_directory = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set'

# Process and save images from the updated Training Set directory
print("Processing Training Set:")
process_and_save_images_from_directory(dat_directory, output_folder_directory)


Processing Training Set:
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (1).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (10).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (100).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (101).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (102).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (103).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (104).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (105).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (106).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set\aara (107).pn

In [3]:
# this is for testing data conversion 


import numpy as np
from PIL import Image
import os

def read_dat_file(file_path):
    encodings = ['utf-8', 'cp1252', 'latin1']
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as file:
                lines = file.readlines()
            break
        except UnicodeDecodeError:
            continue
    else:
        raise UnicodeDecodeError(f"Cannot decode file: {file_path}")

    data = []
    max_length = 0

    for line in lines:
        row = [int(char) for char in line.strip() if char in '01']
        data.append(row)
        if len(row) > max_length:
            max_length = len(row)

    # Pad rows to ensure they have the same length
    padded_data = [row + [0] * (max_length - len(row)) for row in data]
    return np.array(padded_data)

def save_grayscale_image(array, output_path):
    # Normalize array values to range [0, 255]
    normalized_array = (array / np.max(array) * 255).astype(np.uint8)
    image = Image.fromarray(normalized_array, 'L')
    image.save(output_path)

def process_and_save_images_from_directory(dat_directory, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(dat_directory):
        if filename.endswith('.dat'):
            dat_file_path = os.path.join(dat_directory, filename)
            image_data = read_dat_file(dat_file_path)
            # Name output image the same as the .dat file, but with .png extension
            output_filename = filename.replace('.dat', '.png')
            output_path = os.path.join(output_folder, output_filename)
            save_grayscale_image(image_data, output_path)
            print(f"Image saved to: {output_path}")

# Updated Directory for Training Set
dat_directory = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\HWR-Gurmukhi__3.1\HWR-Gurmukhi__3.1\Testing_Set'
output_folder_directory = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set'

# Process and save images from the updated Training Set directory
print("Processing Training Set:")
process_and_save_images_from_directory(dat_directory, output_folder_directory)

Processing Training Set:
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (1).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (10).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (11).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (12).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (13).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (14).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (15).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (16).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (17).png
Image saved to: C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set\aara (18).png
Image saved to: 

In [4]:
# splitting the training dataset into training and testing

import os
import shutil
import random

# Define the paths
source_folder = r"C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set"
train_folder = r"C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set"
val_folder = r"C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Validation_Set"

# Create the destination folders if they don't exist
os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)

# Get the list of files in the source folder
files = os.listdir(source_folder)

# Sort files to ensure they are processed in the correct order
files.sort()

# Group files by character
file_groups = {}
for file in files:
    if file.endswith(".png"):
        char_name, num = file.rsplit(" (", 1)
        if char_name not in file_groups:
            file_groups[char_name] = []
        file_groups[char_name].append(file)

# Process files for each character group
for char_name, char_files in file_groups.items():
    # Shuffle the list of files for each character
    random.shuffle(char_files)
    
    # Split the files into training and validation sets
    train_files = char_files[:112]
    val_files = char_files[112:140]
    
    # Move the files to the appropriate destination folders
    for file in train_files:
        source_file = os.path.join(source_folder, file)
        dest_file = os.path.join(train_folder, file)
        shutil.move(source_file, dest_file)
    
    for file in val_files:
        source_file = os.path.join(source_folder, file)
        dest_file = os.path.join(val_folder, file)
        shutil.move(source_file, dest_file)

print("Files have been successfully distributed into train and validation folders with shuffling.")


Files have been successfully distributed into train and validation folders with shuffling.


In [11]:
import os
import shutil
import re

def organize_images_by_class(source_dir, target_dir):
    """
    Organizes images into class-specific directories based on filenames.
    
    Parameters:
    - source_dir (str): Directory containing the images.
    - target_dir (str): Directory where class-specific directories will be created.
    """
    # Ensure the target directory exists
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    
    # Regex pattern to extract class names
    pattern = re.compile(r'([a-zA-Z]+) \(\d+\)\.png')

    # Iterate over all files in the source directory
    for filename in os.listdir(source_dir):
        # Match the pattern
        match = pattern.match(filename)
        if match:
            class_name = match.group(1)  # Extract class name
            class_dir = os.path.join(target_dir, class_name)
            
            # Create class directory if it doesn't exist
            if not os.path.exists(class_dir):
                os.makedirs(class_dir)
            
            # Move the file to the class directory
            source_file = os.path.join(source_dir, filename)
            target_file = os.path.join(class_dir, filename)
            shutil.move(source_file, target_file)
            print(f"Moved {filename} to {class_name}")
        else:
            print(f"Filename did not match pattern: {filename}")

# Paths to your directories
train_source_dir = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Training_Set'
val_source_dir = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Validation_Set'
test_source_dir = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Testing_Set'

train_target_dir = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Organised_Training_Set'
val_target_dir = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Organised_Validation_Set'
test_target_dir = r'C:\Users\ASUS\OneDrive\Desktop\Capstone\image dataset\Organised_Testing_Set'

# Organize the images
organize_images_by_class(train_source_dir, train_target_dir)
organize_images_by_class(val_source_dir, val_target_dir)
organize_images_by_class(test_source_dir, test_target_dir)

print("All files have been successfully organized by class.")


Moved aara (10).png to aara
Moved aara (100).png to aara
Moved aara (101).png to aara
Moved aara (103).png to aara
Moved aara (104).png to aara
Moved aara (105).png to aara
Moved aara (106).png to aara
Moved aara (108).png to aara
Moved aara (109).png to aara
Moved aara (11).png to aara
Moved aara (110).png to aara
Moved aara (111).png to aara
Moved aara (112).png to aara
Moved aara (113).png to aara
Moved aara (114).png to aara
Moved aara (115).png to aara
Moved aara (116).png to aara
Moved aara (118).png to aara
Moved aara (119).png to aara
Moved aara (12).png to aara
Moved aara (120).png to aara
Moved aara (121).png to aara
Moved aara (122).png to aara
Moved aara (123).png to aara
Moved aara (124).png to aara
Moved aara (125).png to aara
Moved aara (127).png to aara
Moved aara (128).png to aara
Moved aara (129).png to aara
Moved aara (13).png to aara
Moved aara (131).png to aara
Moved aara (132).png to aara
Moved aara (134).png to aara
Moved aara (136).png to aara
Moved aara (137).p