In [8]:
import os
import requests
from PIL import Image
from io import StringIO, BytesIO
import scipy.misc
import tqdm
import torch, torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt
import cv2
import copy
from IPython.display import clear_output
import pickle

In [None]:
def load_dataset(root_dir):
    with open(os.path.join(root_dir, "cats"), "rb") as f:
        cats = pickle.load(f)
    with open(os.path.join(root_dir, "birds"), "rb") as f:
        birds = pickle.load(f)
    return cats, birds

In [9]:
def prepare_dataset(cats, birds, train_val_ratio = 0.8):
    cats = np.transpose(cats, axes = (0, 3, 1, 2))
    birds = np.transpose(birds, axes = (0, 3, 1, 2))
    def split(images, train_val_ratio):
        images_train = images[:int(train_val_ratio * images.shape[0])]
        images_val = images[int(train_val_ratio * images.shape[0]):]
        return images_train, images_val
    cats_train, cats_val = split(cats, train_val_ratio)
    birds_train, birds_val = split(birds, train_val_ratio)
    
    def unite(cats, birds):
        images = np.vstack([cats, birds])
        labels = np.hstack([np.ones(cats.shape[0], dtype = int), \
                            np.zeros(birds.shape[0], dtype = int)])
        permutation = np.random.permutation(images.shape[0])
        images = images[permutation]
        labels = labels[permutation]
        return images, labels

    images_train, labels_train = unite(cats_train, birds_train)
    images_val, labels_val = unite(cats_val, birds_val)
    
    return images_train, labels_train, images_val, labels_val

In [10]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

In [11]:
def compute_loss(X_batch, y_batch):
    X_batch = Variable(torch.FloatTensor(X_batch)).cuda()
    y_batch = Variable(torch.LongTensor(y_batch)).cuda()
    logits = model(X_batch)
    return F.cross_entropy(logits, y_batch).mean()

In [12]:
def iterate_minibatches(X, y, batchsize):
    indices = np.random.permutation(np.arange(len(X)))
    for start in range(0, len(indices), batchsize):
        ix = indices[start: start + batchsize]
        yield X[ix], y[ix]

In [17]:
5 // 3

1

In [15]:
def perform_training(model, images_train, labels_train, images_val, labels_val, n_epochs = 100, lr = 1e-4):
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    train_loss = []
    val_accuracy = []
    train_accuracy = []
    num_epochs = 100
    batch_size = 256
    best_val_accuracy = 0.0
    best_val_accuracy_epoch = -1
    best_model = None
    try:
        for epoch in range(n_epochs):
            model.train(True)
            train_loss_now = []
            for X_batch, y_batch in tqdm.tqdm(iterate_minibatches(images_train, labels_train, batch_size),\
                                             total = ((labels_train.shape[0] - 1) // batch_size) + 1): 
                loss = compute_loss(X_batch, y_batch)
                loss.backward()
                opt.step()
                opt.zero_grad()
                train_loss_now.append(loss.cpu().data.numpy())

            train_loss.append(np.mean(train_loss_now))

            model.train(False)
            num_correct = 0
            for X_batch, y_batch in iterate_minibatches(images_val, labels_val, batch_size):
                logits = model(Variable(torch.FloatTensor(X_batch)).cuda())
                predictions_now = logits.max(1)[1].cpu().data.numpy()
                num_correct += np.sum(predictions_now == y_batch)
            val_accuracy.append(float(num_correct) / labels_val.shape[0])
            if (val_accuracy[-1] > best_val_accuracy):
                best_val_accuracy = val_accuracy[-1]
                best_val_accuracy_epoch = epoch + 1
                best_model = copy.deepcopy(model)

            num_correct = 0
            for X_batch, y_batch in iterate_minibatches(images_train, labels_train, batch_size):
                logits = model(Variable(torch.FloatTensor(X_batch)).cuda())
                predictions_now = logits.max(1)[1].cpu().data.numpy()
                num_correct += np.sum(predictions_now == y_batch)
            train_accuracy.append(float(num_correct) / labels_train.shape[0])

            clear_output()
            plt.plot(np.arange(len(train_accuracy)) + 1, train_accuracy, label = 'accuracy on train dataset')
            plt.plot(np.arange(len(val_accuracy)) + 1, val_accuracy, label = 'accuracy on val_dataset')
            print("accuracy on train dataset now: ", train_accuracy[-1])
            print("accuracy on val dataset now: ", val_accuracy[-1])
            print("best accuracy on val dataset up to now: ", best_val_accuracy)
            print("best accuracy on val dataset was in epoch number: ", best_val_accuracy_epoch)
            plt.legend()
            plt.show()
    except KeyboardInterrupt:
        return best_model
    return best_model

In [None]:
def predict(model, img):
    img_rescaled = cv2.resize(np.array(img), dsize = (100, 75)) / 255.0
    #print("how image is seen by your net")
    #plt.imshow(img_rescaled)
    #plt.show()
    #print(np.min(img), np.max(img))
    img_rescaled = np.expand_dims(img_rescaled, axis = 0)
    img_rescaled = np.transpose(img_rescaled, axes = (0, 3, 1, 2))
    #print(img_rescaled.shape)
    img_rescaled = Variable(torch.FloatTensor(img_rescaled)).cuda()
    logits = model(img_rescaled).cpu().data.numpy()
    logits = logits.squeeze()
    #print(logits)
    if (logits[0] > logits[1]) :
        result = "bird"
    else:
        result = "cat"
    return result

In [1]:
def predict_by_url(model, url):
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))
    plt.imshow(img)
    plt.show()
    result = predict(model, img)
    return result