In [33]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torchvision import transforms 
import torch.nn as nn
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [11]:


print(torch.__version__)
print(torchvision.__version__)


1.13.0+cu117
0.14.0+cu117


In [12]:
import os
folder_path = 'C:/Users/siddu/SU22_project/cursive_prediction/datafileszipped/datafiles'
os.chdir(folder_path)
files_in_folder = os.listdir()

In [13]:
file_dic = {}
for file in files_in_folder:
    letter = file[-5]
    file_dic[file] = letter

In [14]:
df = pd.DataFrame(list(file_dic.items()), columns=['file_name', 'letter'])

In [15]:
df

Unnamed: 0,file_name,letter
0,0000_a.png,a
1,0001_b.png,b
2,0002_n.png,n
3,0003_o.png,o
4,0004_e.png,e
...,...,...
1935,1935_i.png,i
1936,1936_x.png,x
1937,1937_o.png,o
1938,1938_l.png,l


In [16]:

IMG_SIZE = 64
BATCH_SIZE = 4

In [17]:
from PIL import Image

def png_to_tensor(path):
    # Replace 'path_to_image.png' with the actual path to your PNG file
    image_path = folder_path + '/' + path

    # Open the image
    image = Image.open(image_path)
    bw_image = image.convert('L')
    data_transforms = [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(), # Scales data into [0,1] 
        transforms.Lambda(lambda t: (t * 2) - 1) # Scale between [-1, 1] 
    ]
    data_transform = transforms.Compose(data_transforms)
    img_tensor = data_transform(bw_image)


    return img_tensor

In [18]:
# FUNCTION TO ADD NOISE 
def add_gaussian_noise(tensor, mean=0.0, std=0.1):
    noise = torch.randn(tensor.size()) * std + mean
    noisy_tensor = tensor + noise
    return torch.clamp(noisy_tensor, min=-1.0, max=1.0)

In [19]:
# IMAGE TO NOISY TENSOR
def png_to_noisy_tensor(path, noise_mean=0.0, noise_std=0.1):
    # Replace 'path_to_image.png' with the actual path to your PNG file
    image_path = folder_path + '/' + path

    # Open the image
    image = Image.open(image_path)
    bw_image = image.convert('L')
    data_transforms = [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(), # Scales data into [0,1] 
        transforms.Lambda(lambda t: (t * 2) - 1) # Scale between [-1, 1] 
    ]
    data_transform = transforms.Compose(data_transforms)
    img_tensor = data_transform(bw_image)

    # Add Gaussian noise to the tensor
    noisy_img_tensor = add_gaussian_noise(img_tensor, mean=noise_mean, std=noise_std)

    return noisy_img_tensor


In [20]:
def show_tensor_image(tensor):
    reverse_transforms = transforms.Compose([
        transforms.Lambda(lambda t: (t + 1) / 2),
        transforms.Lambda(lambda t: t.permute(1, 2, 0)), # CHW to HWC
        transforms.Lambda(lambda t: t * 255.),
        transforms.Lambda(lambda t: t.numpy().astype(np.uint8)),
        transforms.ToPILImage(),
    ])
    return reverse_transforms(tensor)

In [21]:
df2 = df.copy()

In [22]:
df['image'] = df['file_name'].apply(png_to_tensor)
df.drop(columns = ['file_name'], inplace = True)
df = df[['image', 'letter']]

In [23]:
df

Unnamed: 0,image,letter
0,"[[[tensor(0.5608), tensor(0.5843), tensor(0.66...",a
1,"[[[tensor(0.5451), tensor(0.5529), tensor(0.56...",b
2,"[[[tensor(0.6784), tensor(0.7020), tensor(0.67...",n
3,"[[[tensor(0.6706), tensor(0.6549), tensor(0.63...",o
4,"[[[tensor(0.6863), tensor(0.6784), tensor(0.67...",e
...,...,...
1935,"[[[tensor(0.6706), tensor(0.6627), tensor(0.64...",i
1936,"[[[tensor(0.6627), tensor(0.6627), tensor(0.67...",x
1937,"[[[tensor(0.6000), tensor(0.6078), tensor(0.63...",o
1938,"[[[tensor(0.6235), tensor(0.6235), tensor(0.63...",l


In [24]:
for i in range(10):
    noise_df = df2.copy()
    noise_df['image'] = noise_df['file_name'].apply(png_to_noisy_tensor)
    noise_df.drop(columns = ['file_name'], inplace = True)
    noise_df = noise_df[['image', 'letter']]
    df = pd.concat([df, noise_df])

In [25]:
df

Unnamed: 0,image,letter
0,"[[[tensor(0.5608), tensor(0.5843), tensor(0.66...",a
1,"[[[tensor(0.5451), tensor(0.5529), tensor(0.56...",b
2,"[[[tensor(0.6784), tensor(0.7020), tensor(0.67...",n
3,"[[[tensor(0.6706), tensor(0.6549), tensor(0.63...",o
4,"[[[tensor(0.6863), tensor(0.6784), tensor(0.67...",e
...,...,...
1935,"[[[tensor(0.6481), tensor(0.5903), tensor(0.67...",i
1936,"[[[tensor(0.5757), tensor(0.6452), tensor(0.57...",x
1937,"[[[tensor(0.7354), tensor(0.6577), tensor(0.64...",o
1938,"[[[tensor(0.5820), tensor(0.6283), tensor(0.60...",l


In [26]:
import torch
from torch.utils.data import Dataset, DataLoader


# df to tuples
data_tuples = list(zip(df['image'].tolist(), df['letter'].tolist()))

# Split indices
dataset_size = len(data_tuples)
indices = list(range(dataset_size))
split = int(0.2 * dataset_size)

torch.manual_seed(42)

# Shuffle indices
indices_shuffled = torch.randperm(len(indices))

# Split 
train_indices, test_indices = indices_shuffled[split:], indices_shuffled[:split]

# custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        image_tensor, letter = self.data[index]
        return image_tensor, ord(letter) - ord('a')

    def __len__(self):
        return len(self.data)

# custom Datasets
train_dataset = CustomDataset(data=[data_tuples[i] for i in train_indices])
test_dataset = CustomDataset(data=[data_tuples[i] for i in test_indices])

#  DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)


In [27]:
for X_batch, y_batch in test_loader:
    print("X batch:", X_batch)
    print("y batch:", y_batch)
    break  

X batch: tensor([[[[0.7791, 1.0000, 0.7877,  ..., 0.9536, 0.8303, 0.8612],
          [0.7047, 0.9952, 0.9044,  ..., 0.8123, 0.9538, 1.0000],
          [1.0000, 0.9997, 0.9426,  ..., 0.9597, 0.9447, 1.0000],
          ...,
          [1.0000, 0.9876, 0.9213,  ..., 0.8801, 0.8522, 0.9436],
          [0.8210, 0.7875, 0.8854,  ..., 0.8975, 0.9423, 0.8963],
          [0.9300, 0.9845, 0.8256,  ..., 0.7891, 1.0000, 0.9033]]],


        [[[0.8417, 0.8776, 0.9022,  ..., 0.8834, 0.9329, 1.0000],
          [1.0000, 1.0000, 0.8884,  ..., 1.0000, 1.0000, 0.9112],
          [1.0000, 1.0000, 1.0000,  ..., 0.8625, 0.9354, 0.9621],
          ...,
          [0.9201, 0.7189, 0.7450,  ..., 0.8225, 0.9999, 0.9856],
          [0.9712, 0.9894, 1.0000,  ..., 0.8661, 0.9027, 0.9999],
          [1.0000, 0.9384, 0.9204,  ..., 0.8725, 0.8693, 0.7989]]],


        [[[0.6787, 0.6066, 0.6461,  ..., 0.7067, 0.6820, 0.7064],
          [0.6100, 0.6682, 0.4225,  ..., 1.0000, 0.6869, 0.8561],
          [0.6254, 0.4300, 0.

In [28]:
len(train_loader)

4268

In [29]:

# Define the AlexNet architecture
class AlexNet(nn.Module):
    def __init__(self, num_classes=26):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [30]:
for i, (inputs, labels) in enumerate(train_loader):
    print(inputs)
    print(labels)
    break

tensor([[[[0.6157, 0.6235, 0.6235,  ..., 0.6314, 0.6314, 0.6235],
          [0.6314, 0.6392, 0.6314,  ..., 0.6314, 0.6235, 0.6157],
          [0.6314, 0.6392, 0.6235,  ..., 0.6314, 0.6235, 0.6235],
          ...,
          [0.6157, 0.6235, 0.6314,  ..., 0.6392, 0.6235, 0.6078],
          [0.6314, 0.6314, 0.6314,  ..., 0.6471, 0.6314, 0.6235],
          [0.6235, 0.6314, 0.6392,  ..., 0.6392, 0.6392, 0.6392]]],


        [[[0.6451, 0.5996, 0.7059,  ..., 0.7618, 0.6380, 0.5506],
          [0.7886, 0.7488, 0.8570,  ..., 0.5597, 0.5718, 0.5816],
          [0.7106, 0.6241, 0.5835,  ..., 0.5948, 0.5308, 0.5706],
          ...,
          [0.6845, 0.6438, 0.5147,  ..., 0.6296, 0.5529, 0.5058],
          [0.4930, 0.8210, 0.5579,  ..., 0.6089, 0.6032, 0.6147],
          [0.6179, 0.5367, 0.5228,  ..., 0.8974, 0.4569, 0.6635]]],


        [[[0.6695, 0.6917, 0.5166,  ..., 0.5325, 0.7743, 0.4430],
          [0.7896, 0.6186, 0.5029,  ..., 0.6457, 0.6243, 0.7152],
          [0.5100, 0.4344, 0.6162,  ..

In [42]:
# Instantiate the model
alexnet = AlexNet().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexnet.parameters(), lr=0.01, momentum=0.9)

num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        
        labels = torch.tensor(labels, dtype=torch.long).to(device)

        optimizer.zero_grad()
        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

print("Training Finished")


RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.