In [26]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

device = "cuda" if torch.cuda.is_available() else "cpu"

input_size = 11664
seq_size = 3
num_layers = 2
hidden_size = 2560
num_classes = 768
learning_rate = 0.05
batch_size = 64
num_epochs = 2

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNModel, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size*seq_size, num_classes)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)
        out = self.fc(out)
        return out

In [27]:
transform = transforms.Compose([transforms.Resize(108),
                              transforms.CenterCrop(108),
                              transforms.ToTensor()])

dataset = datasets.ImageFolder("../input/birds1/birds/birds", transform=transform)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=True, batch_size=batch_size) 

model = RNNModel(input_size, hidden_size, num_layers, num_classes).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for i, (data, targets) in enumerate(train_loader):
        data = data.to(device=device).flatten(2)
        targets = targets.to(device=device)
        
        scores = model(data)
        loss = loss_fn(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        
print('Finalizo')
PATH = './selected_model.pth'
torch.save(model, PATH)

Finalizo


In [28]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = torch.load("./selected_model.pth")
model.eval()

RNNModel(
  (rnn): RNN(11664, 2560, num_layers=2, batch_first=True)
  (fc): Linear(in_features=7680, out_features=768, bias=True)
)

In [29]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device).flatten(2)
            y = y.to(device=device)
            
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        print(f'Got {num_correct} / {num_samples} with accuracy  \ {(float(num_correct)/float(num_samples))*100:.2f}')
    model.train()
    
check_accuracy(test_loader, model)

Got 1762 / 11678 with accuracy  \ 15.09


In [33]:
# Test and write submission_test.csv
import csv
import os
from torch.utils.data import Dataset
from PIL import Image


class NoClassDataset(Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        all_imgs = os.listdir(main_dir)
        self.total_imgs = all_imgs

    def __len__(self):
        return len(self.total_imgs)

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc).convert("RGB")
        tensor_image = self.transform(image)
        tensor_image = tensor_image.to(device)
        return tensor_image
    
    def getFileName(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        filename = os.path.basename(img_loc)
        return filename.split(".")[0]

#Creamos el csv

with open('submission.csv', 'w') as file:
    data = ["Id", "Category"]
    writer = csv.writer(file)
    writer.writerow(data)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    transform = transforms.Compose([transforms.Resize(108),
                                     transforms.CenterCrop(108),transforms.ToTensor()])
    dataset = datasets.ImageFolder('../input/birds1/birds/birds', transform=transform)
    model = torch.load('./selected_model.pth')
    model.eval()

    submissions = NoClassDataset('../input/birds1/submission_test/submission_test', transform=transform)
    submissions_loader = DataLoader(submissions , batch_size=1, shuffle=False)

    dict = dataset.class_to_idx
    key_list = list(dict.keys())
    val_list = list(dict.values())
    
    for epoch in range(1): #Necesario para iterar las 2000, La longitud del dataset es batch_size x num_epochs
        for idx, img_normalized in enumerate(submissions_loader):
            img_normalized = img_normalized.flatten(2)
            logits = model(img_normalized)
            probs = torch.nn.functional.softmax(logits, dim=1)

            k = 1
            top_prob, top_ix = probs[0].topk(k)
            ix = top_ix.item()
            prob = top_prob.item()
            position = val_list.index(top_ix)
            cls = key_list[position].strip()
            #import pdb; pdb.set_trace()
            row = [submissions.getFileName(idx), cls]
            writer.writerow(row)