<a href="https://colab.research.google.com/github/johnbeasley1998/NINEworkshop/blob/master/VLASS_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Galaxy Classification**
* We will start from scratch with 3 galaxy classifications. This data is not curated
* 3 categories: diffuse, multiple, single
* Images are monocromatic 960x960 pi
* Series of 921 600 px

Steps:
1. Download data
1. Load the data
1. Run classifier
1. Plot Function Loss

Data is structured as:
* ./images
  * ./train
    * ./single
    * ./multiple
    * ./diffused
  * ./valid
    * ./single
    * ./multiple
    * ./diffused

In [None]:
#Methods to read data
import os
import torch
import torchvision
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch.autograd import Variable




In [None]:
# Define transform and load datasets
simple_transform = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

trainset = ImageFolder('/srv/data/my_shared_data_folder/v3/train/', simple_transform)
validset = ImageFolder('/srv/data/my_shared_data_folder/v3/valid/', simple_transform)


In [None]:
print(trainset)
type(trainset)

print(validset)
type(validset)

Dataset ImageFolder
    Number of datapoints: 344
    Root location: /content/drive/My Drive/NINE/Collab/PyTorch/PyTorch - ImageClassifier/GalaxyClassifier/images/v1/train
Dataset ImageFolder
    Number of datapoints: 125
    Root location: /content/drive/My Drive/NINE/Collab/PyTorch/PyTorch - ImageClassifier/GalaxyClassifier/images/v1/valid


torchvision.datasets.folder.ImageFolder

In [None]:
trainset.class_to_idx

{'diffuse': 0, 'disturbed': 1, 'multiple': 2, 'single': 3}

In [None]:
validset.class_to_idx

{'diffuse': 0, 'disturbed': 1, 'multiple': 2, 'single': 3}

In [None]:
for key, value in trainset.class_to_idx.items():
  print('Class is: ', key, ', Index is: ', value)

Class is:  diffuse , Index is:  0
Class is:  disturbed , Index is:  1
Class is:  multiple , Index is:  2
Class is:  single , Index is:  3


**Next steps**
1. Data is already loaded into 2 variables: trainset and validset
2. Identify the modules you will need for the classifier


In [None]:
import torch #Main torch library
import torch.nn as nn #Neural Network module, we will build our class
import torchvision #Used earlier for data import
import torchvision.transforms as transforms
import torch.nn.functional as F

We need to display some sample images. It's best to import with matplotlib. Also import numpy for good measure.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
#Build the image display function
#Let's use our previous example as a boiler plate
#Lets use the PyTorch built in display method, using the python imaging Library
#PIL or Pillow
from PIL import Image
import random

def imshow(img):
  img = img/2+0.5 #unnormalize
  npimg = img.numpy()
  plt.imshow(np.transpose(npimg, (1, 2, 0)))
  plt.show()


In [None]:
fig=plt.figure(figsize=(10,10))

sub = fig.add_subplot(131)
sub = imshow(trainset[500][0])

In [None]:
fig=plt.figure(figsize=(10,10))

sub = fig.add_subplot(131)
sub = imshow(trainset[1200][0])

In [None]:
fig=plt.figure(figsize=(10,10))

sub = fig.add_subplot(131)
sub = imshow(trainset[1300][0])

**Define NN**

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    #in channel
    #Two convolution kernels
    self.conv1 = nn.Conv2d(3, 10, kernel_size=5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    self.conv2_drop = nn.Dropout2d()
    self.fc1 = nn.Linear(3380, 50) #Transformation variable
    # 921 600
    self.fc2 = nn.Linear(50, 4)

#Forward method is basically a recursion relationship
  def forward(self, x):
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
    x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    x = x.view(x.size(0), -1) #Resize
    x = F.relu(self.fc1(x))
    x = F.dropout(x, training=self.training) #Inherited from nn.Module
    x = F.relu(self.fc2(x))
    
    return F.log_softmax(x, dim=1)

model = Net()

In [None]:
type(model)

__main__.Net

In [None]:
print(model) #Print out model

Net(
  (conv1): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=3380, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=4, bias=True)
)


In [None]:
# Define an optimizer
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.05) #Brian used momentum=5
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.001
    momentum: 0.05
    nesterov: False
    weight_decay: 0
)

To use the NN class we need to define an optimizer.
In order to utilize the new neural network class, we need to define an optimizer with torch.optim

In [None]:
def fit(epoch, model, data_loader, phase='training', volatile=False):
  if phase == 'training':
    model.train()
  elif phase == 'validation':
    model.eval()
    volatile=True
  running_loss = 0.0
  running_correct = 0.0

  for batch_idx, (data, target) in enumerate(data_loader):
    data, target = Variable(data, volatile), Variable(target)
  
    if phase == 'training':
      optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output, target)

    running_loss += F.nll_loss(output, target, size_average=False).data
    preds = output.data.max(dim=1, keepdim=True)[1] 
    running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()

    if phase == 'training':
      loss.backward()
      optimizer.step()
  loss = running_loss / len(data_loader.dataset)
  accuracy = 100. * running_correct / len(data_loader.dataset)

  print(f'{phase} loss is {loss} and {phase} accuracy is {running_correct} / {len(data_loader.dataset)} = {accuracy}')
  return loss, accuracy


In [None]:
#Define loss function and accuracy
train_losses, train_accuracy = [], []
val_losses, val_accuracy = [], []

train_data_loader = torch.utils.data.DataLoader(trainset,batch_size=32,num_workers=3,shuffle=True)
valid_data_loader = torch.utils.data.DataLoader(validset,batch_size=32,num_workers=3,shuffle=True)

In [None]:
for epoch in range(1,4):
  #train model
  #validate
  #update lists
  #Make simple graph of loss function
  epoch_loss, epoch_accuracy = fit(epoch, model, train_data_loader, phase='training')
  val_epoch_loss, val_epoch_accuracy = fit(epoch, model, valid_data_loader, phase='validation')
  train_losses.append(epoch_loss)
  train_accuracy.append(epoch_accuracy)
  val_losses.append(val_epoch_loss)
  val_accuracy.append(val_epoch_accuracy)

In [None]:
plt.plot(range(1, len(train_losses)+1), train_losses, 'bo', label='training loss')
plt.plot(range(1, len(val_losses)+1), val_losses, 'r', label='validation loss')
plt.legend()
plt.show()