In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, models

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skimage import(
    io, measure
)

# check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Current Device: {device}")

# Google Auth
# gauth = GoogleAuth()
# gauth.LocalWebserverAuth()

# drive = GoogleDrive(gauth)

PATH = 'data/INS1_BF/'

Current Device: cuda:0


### TODO:

- [x] Connect to google drive
- [x] Load tiffs into dataframe
- [x] Test train split
- [x] General linear regression model
- [ ] Train
- [ ] Validate





In [2]:
file_list = os.listdir(PATH)
print(file_list)

['0', '10000', '2000', '4000', '6000', '8000']


In [3]:
# Create Data

from torchvision import datasets, transforms

data = datasets.ImageFolder(root='data/INS1_BF', transform=transforms.ToTensor())

In [4]:
print(data.classes)

['0', '10000', '2000', '4000', '6000', '8000']


In [5]:
# Load data with data loader
from torch.utils.data import DataLoader
loader = DataLoader(data, batch_size= 64, shuffle=True)

In [6]:
dataiter = iter(loader)
images, labels = dataiter.next()
print(type(images))
print(images.shape)
print(labels.shape)

<class 'torch.Tensor'>
torch.Size([64, 3, 2000, 2000])
torch.Size([64])


In [7]:
# Using VGG16 Image Model
model = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\Ben Halligan/.cache\torch\checkpoints\vgg16-397923af.pth


HBox(children=(IntProgress(value=0, max=553433881), HTML(value='')))




NameError: name 'OrderDict' is not defined

#### VGG16 Model Description

- Based on [this](https://github.com/LeanManager/PyTorch_Image_Classifier/blob/master/Image_Classifier_Project.ipynb) project
- [VGG16 Model](https://neurohive.io/en/popular-networks/vgg16/)

In [10]:
# Freeze pretrained model parameters to avoid backpropogating through them
for parameter in model.parameters():
    parameter.requires_grad = False


from collections import OrderedDict
# New classifier 
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(25088, 5000)),
                                        ('relu', nn.ReLU()),
                                        ('drop', nn.Dropout(p=0.5)),
                                        ('fc2', nn.Linear(5000, 6)),
                                        ('output', nn.LogSoftmax(dim=1))]))


model.classifier = classifier
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

In [11]:
# Test Train Split
train_size = int(0.8*len(data))
test_size = len(data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, test_size])

In [12]:
# Create test and train dataloaders

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4,
                                         shuffle=True, num_workers=2)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4,
                                        shuffle=True, num_workers=2)

In [15]:
# Train CNN

def train_model():
    
    epochs = 15
    steps = 0
    print_every = 100
    model.to('cuda')
    
    for e in range(epochs):
        model.train()
        running_loss = 0
        
        for images, labels in iter(train_loader):
            steps +=1
            images, labels = images.to('cuda'), labels.to('cuda')
            optimizer.zero_grad()
            
            output = model.forward(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            if steps % print_every == 0:
                
                model.eval()
                
                with torch.no_grad():
                        validation_loss, accuracy = validation(model, validate_loader, criterion)
            
                print("Epoch: {}/{}.. ".format(e+1, epochs),
                        "Training Loss: {:.3f}.. ".format(running_loss/print_every),
                        "Validation Loss: {:.3f}.. ".format(validation_loss/len(validate_loader)),
                        "Validation Accuracy: {:.3f}".format(accuracy/len(validate_loader)))
            
                running_loss = 0
                model.train()

In [16]:
# Test Model

def test_accuracy(model, test_loader):
    
    # Validate test set
    model.eval()
    model.to('cuda')
    
    with torch.no_grad():
        
        accuracy = 0
        
        for images, labels in iter(test_loader):
            
            images, labels = images.to('cuda'), labels.to('cuda')
            
            output = model.forward(images)
            
            probabilities = torch.exp(output)
            
            equality = (labels.data == probabilities.max(dim=1)[1])
            
            accuracy += equality.type(torch.FloatTensor).mean()
            
        print("Test Accuracy: {}".format(accuracy/len(test_loader)))

In [18]:
# Save/Load the checkpoint

def save_checkpoint(model):
    
    model.class_to_idx = training_dataset.class_to_idx
    
    checkpoint = {'arch':"vgg16",
                 'class_to_idx': model.class_to_idx,
                 'model_state_dict':model.state_dict()
                 }
    torch.save(checkpoint, 'checkpoint.pth')

def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    
    if checkpoint['arch'] == 'vgg16':
        
        model = models.vgg16(pretrained=True)
        
        for param in model.parameters():
            param.requires_grad = False
        else:
            print('Architecture not recognized')
        
        model.class_to_idx = checkpoint['class_to_idx']
        classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(25088, 5000)),
                                            ('relu', nn.ReLU()),
                                            ('drop', nn.Dropout(p=0.5)),
                                            ('fc2', nn.Linear(5000, 102)),
                                            ('output', nn.LogSoftmax(dim=1))]))

    model.classifier = classifier
    
    model.load_state_dict(checkpoint['model_state_dict'])
    
    return model

In [23]:
train_model()

RuntimeError: CUDA out of memory. Tried to allocate 3.82 GiB (GPU 0; 8.00 GiB total capacity; 4.88 GiB already allocated; 1.80 GiB free; 4.88 GiB reserved in total by PyTorch)

In [21]:
torch.cuda.empty_cache()