In [None]:
import torch
import torchvision.transforms as T
from torch.utils.data import Dataset, DataLoader

ModuleNotFoundError: No module named 'torchvision'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import shutil
from PIL import Image


In [None]:
root = '/content/drive/MyDrive/DL'

In [None]:
Image.MAX_IMAGE_PIXELS = None
image = Image.open(os.path.join(root,'letters.png' )).convert('RGB')

In [None]:
import math
width, height = image.size
per_letter = int(math.sqrt(width*height/10000))

In [None]:
font_list = [
    '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans-Bold.ttf',
    '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono-Bold.ttf',
    '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSansMono.ttf',
    '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf', # мой шрифт
    '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif-Italic.ttf',
    '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSerif.ttf',
 ]

In [None]:
from PIL import Image, ImageDraw, ImageFont
import random
import matplotlib.pyplot as plt

character = 'J'
for fnt_ in font_list:
  fnt = ImageFont.truetype(fnt_, 150)
  w, h = fnt.getsize(character)

  img = Image.new('L', (300, 300), color='black')
  d = ImageDraw.Draw(img)
  d.text(((300-w)/2, (300-h)/2), character, font=fnt, fill=255, align="center") # TO ALIGN CHARACTER IN CENTER
  img.show()


In [None]:
box = (600, 600, 900, 900)
img2 = image.crop(box)
img2.show()

Эмпирическим путем обнаружили, что мой фонт - '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf'

In [None]:
MY_FONT = '/usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf'

# Генерация данных

In [None]:
from os.path import exists
import string
import os

CHARACTERS = list(string.ascii_uppercase)

class Generate_Dataset:

  def __init__(self, SIZE, root, font_ = MY_FONT):

    self.TINT_COLOR = (0, 0, 0)
    self.font_ = font_
    self.SIZE = SIZE
    self.labels = CHARACTERS
    self.n_classes = len(self.labels)
    self.ROOT = root

  def draw_circle(self):

    overlay = Image.new('RGBA', (300,300), self.TINT_COLOR+(0,))
    draw = ImageDraw.Draw(overlay)  # Create a context for drawing things on it.
    size = random.randint(15, 50)
    start1 = random.randint(0, 300-size)
    start2 = random.randint(0, 300-size)
    circle_color = (random.randint(0, 200), random.randint(0, 200), random.randint(0, 200), random.randint(50, 70))
    draw.ellipse(((start1, start2), (start1+size, start2+size)), fill = circle_color)

    return overlay

  def draw_letter(self, character):
    fnt = ImageFont.truetype(self.font_, size = random.randint(100,200))
    w, h = fnt.getsize(character)

    img = Image.new('RGBA', (300, 300), color='white')
    d = ImageDraw.Draw(img)
    letter_color = (random.randint(0, 200), random.randint(0, 200), random.randint(0, 200))
    start_w = random.randint(0, 300-w)
    start_h = random.randint(0, 300-h)

    d.text((start_w, start_h), character, font=fnt, fill=letter_color, align="center") # TO ALIGN CHARACTER IN CENTER

    return img

  def get_final_pic(self, character):

    img = self.draw_letter(character)
    amount = random.randint(5,30)

    for n in range(amount):
      overlay = self.draw_circle()
      img = Image.alpha_composite(img, overlay)

    return img

  def gen_dataset(self):

    class_len = self.SIZE//self.n_classes
    num = 0

    for i in range(self.n_classes):
      path = f'{self.ROOT}/{self.labels[i]}'
      if not os.path.exists(path):
        os.mkdir(path)

      for repeat in range(class_len):
        img = self.get_final_pic(self.labels[i])
        img = img.save(f"{path}/{num}.png")
        num+=1

In [None]:
# %%time
# Generate_Dataset(SIZE = 10000, root = root).gen_dataset()

In [None]:
class_dirs = [f'{root}/{d}' for d in os.listdir(f'{root}') if d in CHARACTERS]

# Загрузка данных

In [None]:
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

class MyDataLoader(Dataset):

  SPLIT_RANDOM_SEED = 42
  TEST_SIZE = 0.2

  def __init__(self,class_dirs, train = True, transform = None):

    super().__init__()
    self.train = train
    self.transform = transform
    self.to_tensor = T.ToTensor()
    self.all_files = []
    self.all_labels = []
    self.images = []
    self.classes = class_dirs

    for i, class_name in tqdm(enumerate(self.classes), total=len(self.classes)):
        files = sorted(os.listdir(f'{class_name}/'))
        train_files, test_files = train_test_split(files, random_state=self.SPLIT_RANDOM_SEED + i,
                                                    test_size=self.TEST_SIZE)
        if self.train:
            self.all_files += train_files
            self.all_labels += [i] * len(train_files)

        else:
            self.all_files += test_files
            self.all_labels += [i] * len(test_files)

  def __len__(self):
    return len(self.all_files)

  def __getitem__(self, item):

    label = self.all_labels[item]
    filename = self.all_files[item]
    image = Image.open(os.path.join(self.classes[label], filename)).convert('RGBA')

    image = image.convert(mode='RGB')

    if self.transform is not None:
        image = self.transform(image)

    return image, label


In [None]:
# normalize = T.Normalize(mean=[0.485, 0.456, 0.406, 0.406], std=[0.229, 0.224, 0.225, 0.225])

train_transform = T.Compose([
    T.Resize(256),
    T.ElasticTransform(alpha=25.0),
    T.ToTensor(),

    # normalize,
])

test_transform = T.Compose([
    T.Resize(256),
    T.ToTensor(),
    # normalize,
])

In [None]:
train_dataset = MyDataLoader(class_dirs=class_dirs, train=True, transform=train_transform)
test_dataset = MyDataLoader(class_dirs=class_dirs, train=False, transform=test_transform)

In [None]:
transform = T.ToPILImage()
im, lab = train_dataset[500]
im = transform(im)
im.show()


In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True, num_workers=4)

# Сетка

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import clear_output
from tqdm.notebook import tqdm


sns.set_style('whitegrid')
plt.rcParams.update({'font.size': 15})


def plot_losses(train_losses, test_losses, train_accuracies, test_accuracies):
    clear_output()
    fig, axs = plt.subplots(1, 2, figsize=(13, 4))
    axs[0].plot(range(1, len(train_losses) + 1), train_losses, label='train')
    axs[0].plot(range(1, len(test_losses) + 1), test_losses, label='test')
    axs[0].set_ylabel('loss')

    axs[1].plot(range(1, len(train_accuracies) + 1), train_accuracies, label='train')
    axs[1].plot(range(1, len(test_accuracies) + 1), test_accuracies, label='test')
    axs[1].set_ylabel('accuracy')

    for ax in axs:
        ax.set_xlabel('epoch')
        ax.legend()

    plt.show()

In [None]:
def training_epoch(model, optimizer, criterion, train_loader, tqdm_desc):
    train_loss, train_accuracy = 0.0, 0.0
    model.train()
    for images, labels in tqdm(train_loader, desc=tqdm_desc):
        images = images.to(device)  # images: batch_size x num_channels x height x width
        labels = labels.to(device)  # labels: batch_size

        optimizer.zero_grad()
        logits = model(images)  # logits: batch_size x num_classes
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.shape[0]
        train_accuracy += (logits.argmax(dim=1) == labels).sum().item()

    train_loss /= len(train_loader.dataset)
    train_accuracy /= len(train_loader.dataset)
    return train_loss, train_accuracy


@torch.no_grad()
def validation_epoch(model, criterion, test_loader, tqdm_desc):
    test_loss, test_accuracy = 0.0, 0.0
    model.eval()
    for images, labels in tqdm(test_loader, desc=tqdm_desc):
        images = images.to(device)  # images: batch_size x num_channels x height x width
        labels = labels.to(device)  # labels: batch_size
        logits = model(images)  # logits: batch_size x num_classes
        loss = criterion(logits, labels)

        test_loss += loss.item() * images.shape[0]
        test_accuracy += (logits.argmax(dim=1) == labels).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy /= len(test_loader.dataset)
    return test_loss, test_accuracy


def train(model, optimizer, scheduler, criterion, train_loader, test_loader, num_epochs):
    train_losses, train_accuracies = [], []
    test_losses, test_accuracies = [], []

    for epoch in range(1, num_epochs + 1):
        train_loss, train_accuracy = training_epoch(
            model, optimizer, criterion, train_loader,
            tqdm_desc=f'Training {epoch}/{num_epochs}'
        )
        test_loss, test_accuracy = validation_epoch(
            model, criterion, test_loader,
            tqdm_desc=f'Validating {epoch}/{num_epochs}'
        )

        if scheduler is not None:
            scheduler.step()

        train_losses += [train_loss]
        train_accuracies += [train_accuracy]
        test_losses += [test_loss]
        test_accuracies += [test_accuracy]
        plot_losses(train_losses, test_losses, train_accuracies, test_accuracies)

    return train_losses, test_losses, train_accuracies, test_accuracies

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
from torchvision.models import resnet18, ResNet18_Weights

In [None]:
model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

In [None]:
model

In [None]:
model.fc = torch.nn.Linear(512, len(train_dataset.classes))
# model.conv1 = torch.nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [None]:
num_epochs = 10
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = torch.nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs)

In [None]:
!nvidia-smi

In [None]:
num_epochs = 10
train_losses, test_losses, train_accuracies, test_accuracies = train(
    model, optimizer, scheduler, criterion, train_loader, test_loader, num_epochs
)

In [None]:
import pickle
pickle.dump(model, open('model.pkl', 'wb'))

# Разрежем большую картинку на буквы

In [None]:
path = f'{root}/preds'
if not os.path.exists(path):
  os.mkdir(path)


In [None]:
# num =10000
# for row in range(100):
#   for col in range(100):
#     box = (row*300, col*300, (row+1)*300, (col+1)*300)
#     img2 = image.crop(box)
#     img2 = img2.rotate(-5*(col+row), Image.NEAREST, expand = 1, fillcolor = 'white')
#     img2 = img2.save(f"{path}/{num}.png")
#     num += 1


In [None]:
# import shutil
# shutil.rmtree(path)

In [None]:
path1 = f'{path}/10219.png'
img = Image.open(path1)
img.show()

In [None]:
len(os.listdir(path))

# Предсказания

In [None]:
class PredsLoader(Dataset):

  def __init__(self, preds_dir, transform = None):

    super().__init__()
    self.transform = transform
    self.to_tensor = T.ToTensor()
    self.all_files = []
    self.images = []
    self.preds_dir = preds_dir
    self.files = sorted(os.listdir(f'{preds_dir}/'))

    for i in tqdm(self.files, total=len(self.files)):
      self.all_files.append(i)

  def __len__(self):
    return len(self.all_files)

  def __getitem__(self, item):

    filename = self.all_files[item]
    image = Image.open(os.path.join(self.preds_dir, filename)).convert('RGB')

    if self.transform is not None:
        image = self.transform(image)

    return image



In [None]:
p = PredsLoader(preds_dir = path, transform = test_transform)

In [None]:
pred_loader = DataLoader(p, batch_size=8, shuffle=False, pin_memory=True, num_workers=2)


In [None]:
preds2 = []
model.eval()
for images in tqdm(pred_loader):
    images = images.to(device)  # images: batch_size x num_channels x height x width
    logits = model(images)  # logits: batch_size x num_classes
    preds2.extend(torch.argmax(logits, dim = 1).tolist())


In [None]:
import numpy as np
pr = np.bincount(preds2)
pr = [int(p) for p in pr]

result = dict(zip(CHARACTERS, pr))

In [None]:
import json

with open('result.txt', 'w') as file:
     file.write(json.dumps(result))

In [None]:
import pandas as pd
let = list(result.keys())
fr = list(result.values())
col1 = fr[0]
res = pd.DataFrame({'A': let[1:len(let)], str(col1): fr[1:len(fr)]})

In [None]:
res = res.drop(['A'], axis = 1)
res.to_csv(f'{root}/result.csv')