<a href="https://colab.research.google.com/github/abialbon/pytorch-udacity-scholarship/blob/master/Project/Oxford_flower_set.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
!wget -cq https://s3.amazonaws.com/fast-ai-imageclas/oxford-102-flowers.tgz
!tar xzf oxford-102-flowers.tgz
!wget https://raw.githubusercontent.com/udacity/pytorch_challenge/master/cat_to_name.json

In [0]:
import json

with open('cat_to_name.json', 'r') as f:
    classes_to_labels = json.load(f)

In [0]:
import os
os.chdir('oxford-102-flowers')

In [0]:
file_label_mapper = {}

In [0]:
txt_files = ['train.txt', 'valid.txt', 'test.txt']
for t in txt_files:
    with open(t, 'r') as f:
        for line in f.readlines():
            line = line.split()
            file = line[0].strip()
            label = str(int(line[1].strip()) + 1)
            file_label_mapper[file] = label

In [0]:
import pickle
pickle.dump(file_label_mapper, open('mapper.p', 'wb'))

In [0]:
import pickle
file_label_mapper = pickle.load(open('oxford-102-flowers/mapper.p', 'rb'))

In [0]:
os.mkdir('combined')

In [0]:
list_files = os.listdir('jpg')
for f in list_files:
    file_name = 'jpg/' + f
    dir_dest = file_label_mapper[file_name]
    dir_dest = 'combined/' + dir_dest
    file_dest = dir_dest + '/' + f
    if os.path.exists(dir_dest):
        os.replace(file_name, file_dest)
    else:
        os.mkdir(dir_dest)
        os.replace(file_name, file_dest)

In [0]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision


In [0]:
# All imports
import time
import torch
import numpy as np
from torchvision import datasets, models, transforms
from torch import nn
import torch.nn.functional as F
from torch import optim

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device
print(torch.__version__)

In [0]:
train_transforms = transforms.Compose([transforms.RandomHorizontalFlip(p=0.3),
                                       transforms.RandomRotation(20),
                                       transforms.RandomAffine(20),
                                       transforms.Resize(256),
                                       transforms.CenterCrop(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])

trainset = datasets.ImageFolder('oxford-102-flowers/combined', transform=train_transforms)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

In [0]:
%%capture
fc = nn.Sequential(nn.Linear(2048, 102),
                  nn.LogSoftmax(dim=1))

model = models.resnet152(pretrained=True)

model.fc = fc
model.to(device)
criteria = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [0]:
def save_model(model):
    dict_to_save = {
        'class_to_idx': trainset.class_to_idx,
        'input_layers': 2048,
        'output_layers': len(classes_to_labels),
        'state_dict': model.cpu().state_dict()
    }
    torch.save(dict_to_save, 'drive/My Drive/job_/model_over_train.pt')

In [0]:
def train(n_epochs, model, criteria, optimizer, scheduler, log_path):
    for e in range(n_epochs):
        accuracy = 0
        start = time.time()
        train_loss = 0
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            output = model.forward(images)
            loss = criteria(output, labels)
            train_loss += loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            scheduler.step()
            if (e+1) % 5 == 0:
                with torch.no_grad():
                    model.eval()

                    for images, labels in train_loader:
                        images, labels = images.to(device), labels.to(device)

                        output = model.forward(images)
                        probs = torch.exp(output)
                        prob, top_c = probs.topk(1)

                        equals = top_c.squeeze() == labels
                        equals = equals.type(torch.FloatTensor).mean()
                        accuracy += equals.item()

                    else:
                        print('Accuracy: {:3.3f}'.format(accuracy/len(train_loader) * 100))
                        
            else:
                end = time.time()
                total_time = end - start
                metric_string = 'Epoch: {:2d}/{:2d} ---- Train loss: {:3.3f}'.format(e+1, n_epochs, train_loss/len(train_loader))        

                print(metric_string)
                print('Time for last epoch: {:2d}m {:2d}s'.format(int(total_time//60), int(total_time % 60)))

In [0]:
train(15, model, criteria, optimizer, scheduler, 'drive/My Drive/job_/overtrain.txt') 

In [0]:
save_model(model)