In [1]:
import numpy as np
import pandas as pd

# Original dataset as a Pandas DataFrame
data = {
    "Letter": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"],
    "Pixels": [
        [0,0,1,0,0,0,1,0,1,0,1,0,0,0,1,1,1,1,1,1,1,0,0,0,1],
        [1,1,1,1,0,1,0,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0],
        [1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1],
        [1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0],
        [1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1],
        [1,1,1,1,1,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0],
        [1,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,0,0,0,1,1,1,1,1,1],
        [1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1],
        [1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,1,1,1],
        [0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,0],
        [1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0],
        [1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,1,1,1],
        [1,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,1,1,0,0,0,1],
        [1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1],
        [0,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,0,1,1,1,0],
        [1,1,1,1,0,1,0,0,0,1,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0],
        [1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,1,1,0,1],
        [1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1],
        [1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1],
        [1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0],
        [1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1],
        [1,0,0,0,1,1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0],
        [1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1],
        [1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,1],
        [1,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0],
        [1,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,1,1,1]
    ]
}
df = pd.DataFrame(data)

def add_noise(grid, noise_level=0.1):
    noisy_grid = grid.copy()
    num_noisy_pixels = int(noise_level * len(grid))
    indices = np.random.choice(len(grid), num_noisy_pixels, replace=False)
    for idx in indices:
        noisy_grid[idx] = 1 - noisy_grid[idx]  # Flip 0 to 1 or 1 to 0
    return noisy_grid

# Apply noise to generate augmented data
augmented_data = []
for _, row in df.iterrows():
    base_grid = row['Pixels']
    for _ in range(10):  # Generate 10 noisy variants for each letter
        noisy_grid = add_noise(base_grid, noise_level=0.1)
        augmented_data.append([row['Letter'], *noisy_grid])

# Convert augmented data to a DataFrame
augmented_df = pd.DataFrame(augmented_data, columns=["Letter"] + [f"Pixel{i+1}" for i in range(25)])


In [2]:
non_letter_data = []
for _ in range(200):  # Generate 200 random noise patterns
    random_grid = np.random.choice([0, 1], size=25, p=[0.8, 0.2]).tolist()  # Mostly zeros
    non_letter_data.append(["Non-Letter", *random_grid])

non_letter_df = pd.DataFrame(non_letter_data, columns=["Letter"] + [f"Pixel{i+1}" for i in range(25)])


In [3]:
# Combine all data
final_df = pd.concat([augmented_df, non_letter_df], ignore_index=True)
final_df['Label'] = final_df['Letter'].apply(lambda x: 1 if x != "Non-Letter" else 0)  # Label alphabet as 1, non-letter as 0

# Save to CSV for later use
final_df.to_csv("augmented_alphabet_dataset.csv", index=False)
