In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [91]:
import torch
import  torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

In [92]:
# Hyper params
batch_size = 8
epochs = 20
lr = 0.0005

In [93]:
class Model(nn.Module):
  def __init__(self, p = 0.2):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 32, (3,3) , (1,1), (1,1)) # input channel - output channel - mask size , stride - padding
    self.conv2 = nn.Conv2d(32, 64, (3,3) , (1,1), (1,1))
    self.conv3 = nn.Conv2d(64, 128, (3,3) , (1,1), (1,1))

    self.fc1 = nn.Linear(128*8*8, 128)
    self.fc2 = nn.Linear(128, 10)
    self.dropout = nn.Dropout(p) 

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.max_pool2d(x, kernel_size=(2,2))

    x = F.relu(self.conv2(x))
    x = F.max_pool2d(x, kernel_size=(2,2))

    x = F.relu(self.conv3(x))
    x = F.max_pool2d(x, kernel_size=(2,2))
    x = torch.flatten(x, start_dim=1)
    self.dropout(x)
    x = self.fc1(x)
    self.dropout(x)
    x = self.fc2(x)
    x = torch.softmax(x, dim=1)

    return x


In [94]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model().to(device)

In [95]:
def train_val_dataset(dataset, val_split=0.2):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['val'] = Subset(dataset, val_idx)
    return datasets


transform = transforms.Compose([
     transforms.Resize((70,70)),
     transforms.RandomRotation(20),
     transforms.ToTensor(),
     torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

dataset = torchvision.datasets.ImageFolder("/content/drive/MyDrive/MNIST_persian", transform = transform)
print(len(dataset))
datasets = train_val_dataset(dataset)
print(len(datasets['train']))
print(len(datasets['val']))

trainDataLoader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)


1200
960
240


In [96]:
# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
lossFunction = torch.nn.CrossEntropyLoss()

In [97]:
def accuracyCacculator(preds, lables):
  _, preds_max = torch.max(preds,1)
  acc = torch.sum(preds_max == lables , dtype=torch.float64)/len(preds)
  return acc

In [98]:
model.train()

for epoch in range(epochs):
  trainLoss = 0.0
  trainAccuracy = 0.0
  for images, labels in tqdm(trainDataLoader):
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()

    predictions = model(images)
    loss = lossFunction(predictions,labels)
    loss.backward()
    optimizer.step()

    trainLoss += loss
    trainAccuracy += accuracyCacculator(predictions, labels)

  totalLoss = trainLoss/len(trainDataLoader)
  totalAccuracy = trainAccuracy/len(trainDataLoader)
  print(f"Epoch: {epoch +1}, Loss: {totalLoss}, Accuracy: {totalAccuracy}")

100%|██████████| 150/150 [00:04<00:00, 36.15it/s]


Epoch: 1, Loss: 2.2809181213378906, Accuracy: 0.12916666666666668


100%|██████████| 150/150 [00:03<00:00, 37.56it/s]


Epoch: 2, Loss: 1.9230170249938965, Accuracy: 0.5383333333333333


100%|██████████| 150/150 [00:04<00:00, 37.36it/s]


Epoch: 3, Loss: 1.8110705614089966, Accuracy: 0.655


100%|██████████| 150/150 [00:03<00:00, 37.70it/s]


Epoch: 4, Loss: 1.7431919574737549, Accuracy: 0.7200000000000001


100%|██████████| 150/150 [00:03<00:00, 37.86it/s]


Epoch: 5, Loss: 1.6865767240524292, Accuracy: 0.7741666666666667


100%|██████████| 150/150 [00:03<00:00, 38.25it/s]


Epoch: 6, Loss: 1.6382646560668945, Accuracy: 0.8258333333333334


100%|██████████| 150/150 [00:03<00:00, 37.63it/s]


Epoch: 7, Loss: 1.5938748121261597, Accuracy: 0.8725


100%|██████████| 150/150 [00:03<00:00, 38.37it/s]


Epoch: 8, Loss: 1.5718448162078857, Accuracy: 0.8933333333333334


100%|██████████| 150/150 [00:04<00:00, 37.44it/s]


Epoch: 9, Loss: 1.5593425035476685, Accuracy: 0.9083333333333334


100%|██████████| 150/150 [00:03<00:00, 37.71it/s]


Epoch: 10, Loss: 1.5396050214767456, Accuracy: 0.9225000000000001


100%|██████████| 150/150 [00:03<00:00, 37.69it/s]


Epoch: 11, Loss: 1.542871356010437, Accuracy: 0.9241666666666667


100%|██████████| 150/150 [00:03<00:00, 37.79it/s]


Epoch: 12, Loss: 1.5280725955963135, Accuracy: 0.9366666666666668


100%|██████████| 150/150 [00:03<00:00, 37.57it/s]


Epoch: 13, Loss: 1.5214009284973145, Accuracy: 0.9425000000000001


100%|██████████| 150/150 [00:04<00:00, 36.04it/s]


Epoch: 14, Loss: 1.5154271125793457, Accuracy: 0.9516666666666668


100%|██████████| 150/150 [00:03<00:00, 37.90it/s]


Epoch: 15, Loss: 1.5145987272262573, Accuracy: 0.9491666666666667


100%|██████████| 150/150 [00:04<00:00, 37.36it/s]


Epoch: 16, Loss: 1.5107536315917969, Accuracy: 0.9500000000000001


100%|██████████| 150/150 [00:03<00:00, 37.92it/s]


Epoch: 17, Loss: 1.510054349899292, Accuracy: 0.9550000000000001


100%|██████████| 150/150 [00:03<00:00, 38.01it/s]


Epoch: 18, Loss: 1.5068572759628296, Accuracy: 0.9575


100%|██████████| 150/150 [00:03<00:00, 38.89it/s]


Epoch: 19, Loss: 1.521979808807373, Accuracy: 0.9416666666666668


100%|██████████| 150/150 [00:03<00:00, 38.47it/s]

Epoch: 20, Loss: 1.5163789987564087, Accuracy: 0.9458333333333334





In [99]:
#save
torch.save(model.state_dict(),'persian-mnist.pth')

In [106]:
# Inference
import cv2 as cv
import numpy as np

model = Model()
model.load_state_dict(torch.load('persian-mnist.pth'))

inference_transform = transforms.Compose([
     transforms.ToTensor(),
     torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

model.to(device)
model.train(False)
model.eval()

img = cv.imread('three.png')
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
img = cv.resize(img,(64,64))
tensor = inference_transform(img).unsqueeze(0).to(device)
predictions = model(tensor).cpu().detach().numpy()
print(np.argmax(predictions, axis=1))

[3]
