In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import ToTensor, RandomCrop
import torchvision.transforms.functional as TF

In [None]:
class NumbersDataset(Dataset):
    def __init__(self):
        data_train = np.load("../input/chinese-char-recognition-smmo19/train-1.npy", allow_pickle=True)
        for i in range(2, 5):
            t = np.load(f"../input/chinese-char-recognition-smmo19/train-{i}.npy", allow_pickle=True)
            data_train = np.concatenate([data_train, t])
        self.train = data_train
        self.classes = dict([(x[1], x[0]) for x in enumerate(np.unique(self.train[:,1]))])#мой словарик шарик
        
    def __len__(self):
        return len(self.train)

    def __getitem__(self, idx):
        img = self.train[idx]
        #a=max(img[0].shape)
        transform = RandomCrop(size=80, pad_if_needed=True)
        image = TF.to_tensor(transform(TF.to_pil_image(img[0])))
        return image, self.classes.get(img[1])

In [None]:
class TestNumbersDataset(Dataset):
    def __init__(self):
        self.test = np.load("../input/chinese-char-recognition-smmo19/test.npy", allow_pickle=True)
        
    def __len__(self):
        return len(self.test)
    
    def __getitem__(self, idx):
        img = self.test[idx]
        transform = RandomCrop(size=max(img.shape), pad_if_needed=True)
        image = TF.to_tensor(transform(TF.to_pil_image(img)))
        return image

In [None]:
trainset = NumbersDataset()
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = TestNumbersDataset()
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)

In [None]:
classes=trainset.classes.keys()

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        torch.nn.init.xavier_uniform(self.conv1.weight)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32,128, 3)
        torch.nn.init.xavier_uniform(self.conv2.weight)
        self.fc1 = nn.Linear(128*18*18, 1200)
        torch.nn.init.xavier_uniform(self.fc1.weight)
        self.fc3 = nn.Linear(1200, 1000)
        torch.nn.init.xavier_uniform(self.fc3.weight)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 128*18*18)
        x = F.relu(self.fc1(x))
        x = F.sigmoid(self.fc3(x))
        return x
    
net = Net().cuda()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters())

In [None]:
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        inputs = inputs.cuda()
        labels = labels.cuda()
        # forward + backward + optimize
        outputs = net(inputs)#[:len(labels)] 
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
    print('[%d, %5d] loss: %.3f' %
          (epoch + 1, i + 1, np.mean(running_loss)))
    running_loss = 0.0

print('Finished Training')