# Aquire dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import emnist

import torch 
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [None]:
def get_data():
    X, Y = emnist.extract_training_samples('letters')
    X_test, Y_test = emnist.extract_test_samples('letters')

    return X, Y- 1, X_test, Y_test - 1

In [None]:
X, m_Y, X_test, m_Y_test = get_data()

In [None]:
X.shape

In [None]:
X = torch.tensor(np.array(X), dtype=torch.float)
X_test = torch.tensor(np.array(X_test), dtype=torch.float)

In [None]:
m = X.shape[0]

In [None]:
m

In [None]:
Y = torch.zeros(m,26, dtype = torch.float)
Y_test = torch.zeros(X_test.shape[0],26, dtype = torch.long)
for i in range(0,m):
    Y[i, m_Y[i]] = 1
for i in range(0,X_test.shape[0]):
    Y_test[i, m_Y_test[i]] = 1

In [None]:
train_dataset = TensorDataset(X, Y)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(X_test, Y_test)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
def show_img(i):
    plt.imshow(i)

In [None]:
show_img(X[0])

In [None]:
#settings 

device = 'cuda' if torch.cuda.is_available() else 'cpu'
learning_rate = 0.01
batch_size = 10
epochs = 10

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,5,padding=2)
        self.pool = nn.AvgPool2d((2,2),stride=(2,2))
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv3 = nn.Conv2d(16,120,5)
        self.fc1 = nn.Linear(120,84)
        self.fc2 = nn.Linear(84,26)
    
    def forward(self, x):
        x = torch.relu((self.conv1(x)))
        x = self.pool(x)
        x = torch.relu((self.conv2(x)))
        x = self.pool(x)
        x = torch.relu(self.conv3(x))
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = Net().to(device)

In [None]:
demo_tensor = torch.rand(1,1,28,28, device=device)
model(demo_tensor)

In [None]:
loss_fn = nn.CrossEntropyLoss()

In [None]:
optimizer = optim.SGD(model.parameters(), lr = learning_rate)

In [None]:
losses = []

In [None]:
for epoch in range(epochs):
    running_loss = 0.0
    print(f"Epoch {epoch+1}\n-------------------------------")
    size = len(train_dataloader.dataset)
    for batch, (x, y) in enumerate(train_dataloader):
        x = torch.unsqueeze(x ,1)
        x, y = x.to(device), y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
        # print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        losses.append(loss.item())
        if batch % 200 == 199:
            print(f'[{epoch + 1}, {batch + 1:5d}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0
print('training finished')

In [None]:
plt.plot(losses)

In [None]:
torch.save(model, "noice.pth")

In [None]:
count = 0

m_test = X_test.shape[0]

with torch.no_grad():
    for (x, y) in test_dataloader:
        x = torch.unsqueeze(x, 1)
        x, y = x.to(device), y.to(device)
        count += torch.sum(torch.argmax(model(x),1) == torch.argmax(y,1))
#accuracy
print(f'accuracy of model = {count/m_test * 100:.2f}%')

In [None]:
show_img(X_test[1])

In [None]:
a = 'a'
chr(ord(a) + 1)

In [None]:
rand_tensor = torch.rand(1,1,28,28,device='cuda')
model(rand_tensor).shape

In [None]:
arr = X_test[4583]
show_img(arr)
test_tensor = (arr).to(device)
test_tensor = torch.unsqueeze(test_tensor, 0)
#test_tensor = torch.unsqueeze(test_tensor, 0)
letters = [chr(ord('a') + i) for i in range(26)]

letters[torch.argmax(model(test_tensor.unsqueeze(dim=0))).item()]