<a href="https://colab.research.google.com/github/debemdeboas/pucrs-aprendizado-de-maquina-t2/blob/master/PUCRS_Aprendizado_de_M%C3%A1quina_T2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import tensorflow as tf
from PIL import Image
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from torchvision import datasets
from torchvision import transforms as T
from torchvision.transforms import functional as TF
from tqdm import tqdm

Download and extract our dataset.

This will download a tarred file and extract it into `dist`. Then, we're renaming it to `animes/`.
This directory contains the following files:
- `animes.csv`, the CSV containing anime IDs, URLs, titles, genres, and poster path
- `animes.pkl`, serialized (pickled) list of `Anime` instances. This isn't used by this notebook
- `images/`, a directory that contains all of our anime posters as `images/<mal_id>.jpg` files


In [5]:
ds = requests.get("https://public-s3.debem.dev/anime_dataset.tar.xz", allow_redirects=True)

with open("anime_dataset.tar.xz", "wb") as f:
    f.write(ds.content)

!tar xf anime_dataset.tar.xz

df = pd.read_csv("animes.csv")
df = df.dropna()
df["genres"] = df["genres"].str.split('|')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["genres"] = df["genres"].str.split('|')


In [6]:
all_genres_to_idx = dict()
all_genres = list()
for gl in df.genres:
    for g in gl:
        if g not in all_genres_to_idx:
            all_genres.append(g)
            all_genres_to_idx[g] = len(all_genres) - 1

In [7]:
class PosterMultiLabelDataset(Dataset):
    def __init__(self, df: pd.DataFrame, transform=None, *args, **kwargs):
        self.df = df
        if transform:
            self.transform = transform
        else:
            self.transform = T.Compose([
                T.Resize((256,256)),
                T.RandomResizedCrop(224),
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
                ])

    def __len__(self) -> int:
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx].img_path
        img = Image.open(img_path)
        img = img.convert('RGB')
        return self.transform(img), torch.Tensor([1 if g in self.df.loc[idx].genres else 0 for g in all_genres])
        # return {
        #     "image": self.transform(img),
        #     "labels": torch.Tensor([1 if g in self.df.loc[idx].genres else 0 for g in all_genres])
        # }

Now let's get some transfer learning done.

We'll use a pre-trained convolutional network to analyze the posters to define which genres a given anime belongs to.
Each anime can belong to any number of genres.


In [45]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

def validation(model, loader, criterion):
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs,labels)
            val_loss +=loss
    return val_loss/len(loader)


def train(model, trainloader, testloader, optimizer, criterion, epochs):
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for i, (images, labels) in tqdm(enumerate(trainloader)):
            images = images.to(device)
            labels = labels.to(device)
            model.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        val_loss = validation(model, testloader, criterion)
        print(f'Epoch: {epoch+1} | Loss: {running_loss/len(trainloader)} | Val Loss: {val_loss}')


def confusion_matrix(model, loader):
    model.eval()
    confusion_matrix = np.zeros((len(all_genres), len(all_genres)))
    with torch.no_grad():
        for (img, lbl) in loader:
            img = img.to(device)
            lbl = lbl.to(device)
            output = model(img)
            predictions = output > 0.5

                # confusion_matrix[idx_true][idx_pred] += int(predictions[i][j].item())

            # list(zip(zip(ls[0].tolist(), predictions[0].tolist()), all_genres))

            for ll, pp in zip(lbl, predictions):
                for i, (l, p) in enumerate(zip(ll, pp)):
                    print(all_genres[i], l.item(), p.item())
                    break
                break


                # confusion_matrix[idx_true][idx_pred] += 1
    ax = sns.heatmap(confusion_matrix, annot=True, cmap='Blues', fmt='g')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Label')
    return ax


def accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for (img, lbl) in loader:
            img = img.to(device)
            lbl = lbl.to(device)
            output = model(img)
            predictions = output > 0.5
            correct += (predictions | (lbl > 0)).sum().item()
            total += lbl.shape[0] * lbl.shape[1]
    return correct * 100 // total


In [9]:
from torchvision.models import resnet50

resnet = resnet50(weights = True)
resnet.fc = nn.Linear(2048, len(all_genres))
for name, params in resnet.named_parameters():
    if name not in ('fc.weight', 'fc.bias'):
        params.requires_grad = False

resnet.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
epochs = 3



In [11]:
# dados de treino = 70%, validação = 15%, teste = 15%
trainData = df.sample(frac = 0.7)
trainDataLeftover = df.drop(trainData.index)
validationData = trainDataLeftover.sample(frac = 0.5)
testData = trainDataLeftover.drop(validationData.index)

testData = testData.reset_index()
validationData = validationData.reset_index()
trainData = trainData.reset_index()

In [12]:
trainloader = DataLoader(PosterMultiLabelDataset(df=trainData), batch_size=4, shuffle=True)
testloader = DataLoader(PosterMultiLabelDataset(df=testData), batch_size=4, shuffle=False)
valloader = DataLoader(PosterMultiLabelDataset(df=validationData), batch_size=4, shuffle=False)

In [13]:
train(resnet, trainloader, testloader, optimizer, criterion, epochs)

4248it [05:50, 12.12it/s]


Epoch: 1 | Loss: 0.1307430973657093 | Val Loss: 0.12884527444839478


4248it [05:40, 12.49it/s]


Epoch: 2 | Loss: 0.1282656340160023 | Val Loss: 0.12489526718854904


4248it [05:48, 12.18it/s]


Epoch: 3 | Loss: 0.12741508798344015 | Val Loss: 0.12873677909374237


In [15]:
torch.save(resnet, 'resnet50.pt')

In [46]:
print(f'A rede atinge: {accuracy(resnet, valloader)}% de acurácia')

A rede atinge: 4% de acurácia


In [None]:
ls = [l for l in lbl]
list(zip(zip(ls[0].tolist(), predictions[0].tolist()), all_genres))

In [None]:
confusion_matrix(resnet, valloader)