In [10]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision import transforms 
import torch.nn as nn
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:


print(torch.__version__)
print(torchvision.__version__)


1.13.0+cu117
0.14.0+cu117


In [12]:
import os
folder_path = 'C:/Users/siddu/SU22_project/cursive_prediction/datafileszipped/datafiles'
os.chdir(folder_path)
files_in_folder = os.listdir()

In [13]:
file_dic = {}
for file in files_in_folder:
    letter = file[-5]
    file_dic[file] = letter

In [14]:
df = pd.DataFrame(list(file_dic.items()), columns=['file_name', 'letter'])

In [15]:
df

Unnamed: 0,file_name,letter
0,0000_a.png,a
1,0001_b.png,b
2,0002_n.png,n
3,0003_o.png,o
4,0004_e.png,e
...,...,...
1935,1935_i.png,i
1936,1936_x.png,x
1937,1937_o.png,o
1938,1938_l.png,l


In [16]:

IMG_SIZE = 64
BATCH_SIZE = 32

In [17]:
from PIL import Image

def png_to_tensor(path):
    # Replace 'path_to_image.png' with the actual path to your PNG file
    image_path = folder_path + '/' + path

    # Open the image
    image = Image.open(image_path)
    bw_image = image.convert('L')
    data_transforms = [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(), # Scales data into [0,1] 
        transforms.Lambda(lambda t: (t * 2) - 1) # Scale between [-1, 1] 
    ]
    data_transform = transforms.Compose(data_transforms)
    img_tensor = data_transform(bw_image)


    return img_tensor

In [18]:
# FUNCTION TO ADD NOISE 
def add_gaussian_noise(tensor, mean=0.0, std=0.1):
    noise = torch.randn(tensor.size()) * std + mean
    noisy_tensor = tensor + noise
    return torch.clamp(noisy_tensor, min=-1.0, max=1.0)

In [19]:
# IMAGE TO NOISY TENSOR
def png_to_noisy_tensor(path, noise_mean=0.0, noise_std=0.1):
    # Replace 'path_to_image.png' with the actual path to your PNG file
    image_path = folder_path + '/' + path

    # Open the image
    image = Image.open(image_path)
    bw_image = image.convert('L')
    data_transforms = [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(), # Scales data into [0,1] 
        transforms.Lambda(lambda t: (t * 2) - 1) # Scale between [-1, 1] 
    ]
    data_transform = transforms.Compose(data_transforms)
    img_tensor = data_transform(bw_image)

    # Add Gaussian noise to the tensor
    noisy_img_tensor = add_gaussian_noise(img_tensor, mean=noise_mean, std=noise_std)

    return noisy_img_tensor


In [20]:
def show_tensor_image(tensor):
    reverse_transforms = transforms.Compose([
        transforms.Lambda(lambda t: (t + 1) / 2),
        transforms.Lambda(lambda t: t.permute(1, 2, 0)), # CHW to HWC
        transforms.Lambda(lambda t: t * 255.),
        transforms.Lambda(lambda t: t.numpy().astype(np.uint8)),
        transforms.ToPILImage(),
    ])
    return reverse_transforms(tensor)

In [21]:
df2 = df.copy()

In [22]:
df['image'] = df['file_name'].apply(png_to_tensor)
df.drop(columns = ['file_name'], inplace = True)
df = df[['image', 'letter']]

In [23]:
df

Unnamed: 0,image,letter
0,"[[[tensor(0.5608), tensor(0.5843), tensor(0.66...",a
1,"[[[tensor(0.5451), tensor(0.5529), tensor(0.56...",b
2,"[[[tensor(0.6784), tensor(0.7020), tensor(0.67...",n
3,"[[[tensor(0.6706), tensor(0.6549), tensor(0.63...",o
4,"[[[tensor(0.6863), tensor(0.6784), tensor(0.67...",e
...,...,...
1935,"[[[tensor(0.6706), tensor(0.6627), tensor(0.64...",i
1936,"[[[tensor(0.6627), tensor(0.6627), tensor(0.67...",x
1937,"[[[tensor(0.6000), tensor(0.6078), tensor(0.63...",o
1938,"[[[tensor(0.6235), tensor(0.6235), tensor(0.63...",l


In [24]:
for i in range(10):
    noise_df = df2.copy()
    noise_df['image'] = noise_df['file_name'].apply(png_to_noisy_tensor)
    noise_df.drop(columns = ['file_name'], inplace = True)
    noise_df = noise_df[['image', 'letter']]
    df = pd.concat([df, noise_df])

In [25]:
df

Unnamed: 0,image,letter
0,"[[[tensor(0.5608), tensor(0.5843), tensor(0.66...",a
1,"[[[tensor(0.5451), tensor(0.5529), tensor(0.56...",b
2,"[[[tensor(0.6784), tensor(0.7020), tensor(0.67...",n
3,"[[[tensor(0.6706), tensor(0.6549), tensor(0.63...",o
4,"[[[tensor(0.6863), tensor(0.6784), tensor(0.67...",e
...,...,...
1935,"[[[tensor(0.5413), tensor(0.6765), tensor(0.64...",i
1936,"[[[tensor(0.5499), tensor(0.6307), tensor(0.71...",x
1937,"[[[tensor(0.6085), tensor(0.3713), tensor(0.47...",o
1938,"[[[tensor(0.5232), tensor(0.5870), tensor(0.70...",l


In [26]:
import torch
from torch.utils.data import Dataset, DataLoader


# df to tuples
data_tuples = list(zip(df['image'].tolist(), df['letter'].tolist()))

# Split indices
dataset_size = len(data_tuples)
indices = list(range(dataset_size))
split = int(0.2 * dataset_size)

torch.manual_seed(42)

# Shuffle indices
indices_shuffled = torch.randperm(len(indices))

# Split 
train_indices, test_indices = indices_shuffled[split:], indices_shuffled[:split]

# custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        image_tensor, letter = self.data[index]
        return image_tensor, ord(letter) - ord('a')

    def __len__(self):
        return len(self.data)

# custom Datasets
train_dataset = CustomDataset(data=[data_tuples[i] for i in train_indices])
test_dataset = CustomDataset(data=[data_tuples[i] for i in test_indices])

#  DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)


In [27]:
for X_batch, y_batch in test_loader:
    print("X batch:", X_batch)
    print("y batch:", y_batch)
    break  

X batch: tensor([[[[0.9633, 0.7800, 0.7885,  ..., 0.9929, 1.0000, 0.8275],
          [0.9061, 1.0000, 0.8815,  ..., 1.0000, 1.0000, 1.0000],
          [0.8827, 0.9692, 0.9823,  ..., 1.0000, 0.9047, 1.0000],
          ...,
          [1.0000, 0.9631, 0.7805,  ..., 0.9368, 0.8677, 0.9429],
          [0.7482, 1.0000, 0.9075,  ..., 1.0000, 0.9974, 0.8434],
          [0.9015, 0.8534, 0.9147,  ..., 0.8314, 1.0000, 0.9050]]],


        [[[1.0000, 1.0000, 1.0000,  ..., 1.0000, 0.9411, 0.9629],
          [0.7876, 1.0000, 1.0000,  ..., 0.9686, 1.0000, 0.9964],
          [0.9364, 0.8613, 1.0000,  ..., 1.0000, 1.0000, 0.8520],
          ...,
          [0.8087, 0.9351, 0.9440,  ..., 0.9332, 1.0000, 0.9627],
          [0.9825, 0.9668, 0.9852,  ..., 0.8212, 0.8582, 0.7907],
          [0.8098, 0.8525, 1.0000,  ..., 0.7390, 0.8902, 0.8726]]],


        [[[0.5030, 0.5592, 0.6912,  ..., 0.9561, 0.5438, 0.7189],
          [0.5664, 0.6230, 0.4967,  ..., 0.7679, 0.6589, 0.4940],
          [0.4906, 0.8707, 0.

In [28]:
len(train_loader)

534

In [29]:
# AlexNet architecture
class AlexNet(nn.Module):
    def __init__(self, num_classes=26):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [30]:
for i, (inputs, labels) in enumerate(train_loader):
    print(inputs)
    print(labels)
    break

tensor([[[[0.6157, 0.6235, 0.6235,  ..., 0.6314, 0.6314, 0.6235],
          [0.6314, 0.6392, 0.6314,  ..., 0.6314, 0.6235, 0.6157],
          [0.6314, 0.6392, 0.6235,  ..., 0.6314, 0.6235, 0.6235],
          ...,
          [0.6157, 0.6235, 0.6314,  ..., 0.6392, 0.6235, 0.6078],
          [0.6314, 0.6314, 0.6314,  ..., 0.6471, 0.6314, 0.6235],
          [0.6235, 0.6314, 0.6392,  ..., 0.6392, 0.6392, 0.6392]]],


        [[[0.6977, 0.6257, 0.7284,  ..., 0.6942, 0.5811, 0.7750],
          [0.6833, 0.7742, 0.5949,  ..., 0.5740, 0.5714, 0.8256],
          [0.6571, 0.5698, 0.6884,  ..., 0.5186, 0.4938, 0.2538],
          ...,
          [0.7125, 0.5852, 0.3265,  ..., 0.6074, 0.5756, 0.6338],
          [0.4711, 0.6000, 0.7612,  ..., 0.5138, 0.5671, 0.7145],
          [0.7407, 0.6313, 0.4907,  ..., 0.7099, 0.6813, 0.3908]]],


        [[[0.7017, 0.6115, 0.7068,  ..., 0.7467, 0.5937, 0.4835],
          [0.6620, 0.6337, 0.5716,  ..., 0.5986, 0.6465, 0.5818],
          [0.6309, 0.5537, 0.6048,  ..

In [31]:
# Instantiate the model
alexnet = AlexNet().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexnet.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        
        labels = torch.tensor(labels, dtype=torch.long).to(device)

        optimizer.zero_grad()
        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

print("Training Finished")


  labels = torch.tensor(labels, dtype=torch.long).to(device)


Epoch 1, Loss: 2.749991363130705
Epoch 2, Loss: 1.4550473125239882
Epoch 3, Loss: 0.5521483190413494
Epoch 4, Loss: 0.2850508477559753
Epoch 5, Loss: 0.17696214255341416
Epoch 6, Loss: 0.0983422288114893
Epoch 7, Loss: 0.061378138346840846
Epoch 8, Loss: 0.03474480204366979
Epoch 9, Loss: 0.011918613272372269
Epoch 10, Loss: 0.029423709331345985
Training Finished


In [32]:

test_loss = 0.0

alexnet.eval()

with torch.no_grad():
    for i, (inputs, labels) in enumerate(test_loader):
        inputs = inputs.to(device)
        labels = torch.tensor(labels, dtype=torch.long).to(device)

        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

    print(f"Test Loss: {test_loss/len(test_loader)}")

print("Testing Finished")


  labels = torch.tensor(labels, dtype=torch.long).to(device)


Test Loss: 0.0002695374190325446
Testing Finished


In [27]:
x = df['image'][0]

In [49]:
def pred(x):
    alexnet.eval()
    with torch.no_grad():
        input_image = x
        output = alexnet(input_image)
        
    class_labels = np.unique(df['letter'])

    predicted_class_index = output.argmax().item()

    predicted_class_label = class_labels[predicted_class_index]

    print(f"Predicted Class: {predicted_class_label}")

In [None]:
pred()