#Lab 8: Learning from Images

### Part 1: Butterfly Classification
Train, Test data set for 10 butterfly species. All images are 224 X 224 X 3 in jpg format.

In [None]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [2]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report
import numpy as np

from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from itertools import chain
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder

In [3]:
# Define the image preprocessing pipeline
Butterfly_trans = transforms.Compose([        # composes several transforms together
    transforms.Resize((224, 224)),
    transforms.ToTensor() # ToTensor() converts images to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
])

In [4]:
# Loading images and pass the images through our preprocessing pipeline
train_butterfly10 = ImageFolder('/content/drive/MyDrive/IS675_data/Week8_data/butterfly_train_top10', transform=Butterfly_trans)
test_butterfly10 = ImageFolder('/content/drive/MyDrive/IS675_data/Week8_data/butterfly_test_top10', transform=Butterfly_trans)

In [None]:
# Examine the number of classes
len(train_butterfly10.classes)

In [None]:
# Examine the sizes of training and test data
print(len(train_butterfly10), len(test_butterfly10))

In [None]:
# Display a sample of data from test set
class_names = ['AN 88','BLUE MORPHO','COMMON WOOD-NYMPH','MONARCH','PEACOCK','PIPEVINE SWALLOW','ULYSES','VICEROY','YELLOW SWALLOW TAIL','ZEBRA LONG WING']

fig = plt.figure(figsize=(15,5))
num_classes = 10
for i in range(num_classes):
    ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
    ax.set_title(class_names[i])
    img = next(img for img, label in test_butterfly10 if label == i)
    plt.imshow(img.permute(1, 2, 0))
plt.show()

In [None]:
# Examine the tensor of a zebra long wing image
print(img.shape)

In [None]:
# Examine the mean and std of images in the training data
imgs=torch.stack([img_t for img_t,label in train_butterfly10],dim=3)
print(imgs.view(3,x1).mean(dim=1),imgs.view(3,x1).std(dim=1))

**Q1.** a) What does .view() do in the code above? b) What values can x1 take?

In [20]:
# Define the image preprocessing pipeline to include normalization
Butterfly_trans = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.4630, 0.4530, 0.3405], std = [0.2862, 0.2758, 0.2845])
])

In [21]:
# Loading images and pass the images through our preprocessing pipeline
train_butterfly10 = ImageFolder('/content/drive/MyDrive/IS675_data/Week8_data/butterfly_train_top10', transform=Butterfly_trans)
test_butterfly10 = ImageFolder('/content/drive/MyDrive/IS675_data/Week8_data/butterfly_test_top10', transform=Butterfly_trans)

In [22]:
# Define training and testing data loader, and set batch size to 128
train_loader_butterfly10 = torch.utils.data.DataLoader(train_butterfly10, batch_size=64, shuffle=True)
test_loader_butterfly10 = torch.utils.data.DataLoader(test_butterfly10, batch_size=64, shuffle=False)

In [23]:
# Build a neural network on training data
class neural_network(nn.Module):
    def __init__(self,  in_size, hidden_size1,hidden_size2, out_size):
        super().__init__()
        self.network = nn.Sequential(
          nn.Linear(in_size, hidden_size1),
          nn.ReLU(),
          nn.Linear(hidden_size1, hidden_size2),
          nn.ReLU(),
          nn.Linear(hidden_size2, out_size))

    def forward(self, x):
        x_reshape=x.view(x.shape[0],-1)
        out = self.network(x_reshape)
        return out.squeeze()

In [24]:
# Define training loop function
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(0, n_epochs):
        # Training Phase
        model.train()
        loss_train = 0.0
        for inputs, labels in train_loader:

            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

        if epoch == 0 or epoch == n_epochs-1 or epoch % 1 == 0:
            print('Epoch {}, Training loss {}'.format(epoch, loss_train / len(train_loader)))

In [None]:
# Model training with hidden layers of size 128 and 64
torch.manual_seed(0)
model = neural_network(x2, x3, x4, x5)
optimizer = optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()

training_loop(n_epochs = 5, optimizer = optimizer, model = model, loss_fn = loss_fn, train_loader = train_loader_butterfly10)

**Q2.** What are the values for x2 to x5? Which ones are arbitrary?

**Q3.** Why don't we need to use softmax in the output layer of our neural_network, despite having a multi-class classification task?

In [26]:
# Define testing function
def test(model, train_loader, test_loader):

  # testing phase
  model.eval()
  predict_train = []
  predict_test = []
  labels_train = []
  labels_test = []

  with torch.no_grad():
      for inputs, labels in train_loader:
          outputs = model(inputs)
          index_, predicted = torch.max(outputs, dim=1)
          predict_train.append(predicted.tolist())
          labels_train.append(labels.tolist())

      for inputs, labels in test_loader:
          outputs = model(inputs)
          index_, predicted = torch.max(outputs, dim=1)
          predict_test.append(predicted.tolist())
          labels_test.append(labels.tolist())

  print("Confusion matrix on train:\n",  confusion_matrix(list(chain(*labels_train)), list(chain(*predict_train)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Classification report on train:\n",  classification_report(list(chain(*labels_train)), list(chain(*predict_train)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Confusion matrix on test:\n",  confusion_matrix(list(chain(*labels_test)), list(chain(*predict_test)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Classification report on test:\n",  classification_report(list(chain(*labels_test)), list(chain(*predict_test)), labels=[0,1,2,3,4,5,6,7,8,9]))

In [None]:
# Examine evaluation results
test(model, train_loader_butterfly10, test_loader_butterfly10)

**Q4.** What parameters we can change in our neural net to potentially improve the performance? List every thing you can think of.

### Part 2: Image Classification with Fashion-MNIST
Fashion-MNIST is a dataset consisting of a training set of 60,000 examples and a test set of 10,000 examples.
Each example is a 28x28 grayscale image, associated with a label from 10 classes. (https://github.com/zalandoresearch/fashion-mnist)


Label Description
Each training and test example is assigned to one of the following labels:

0 T-shirt/top,
1 Trouser,
2 Pullover,
3 Dress,
4 Coat,
5 Sandal,
6 Shirt,
7 Sneaker,
8 Bag,
9 Ankle boot

In [28]:
# Define the image preprocessing pipeline
MNIST_transform = transforms.Compose([transforms.ToTensor()]) # composes several transforms together

In [29]:
# Loading images and pass the images through our preprocessing pipeline
train_data = datasets.FashionMNIST('/content/drive/MyDrive/IS675_data', download = True, train = True, transform=MNIST_transform)
test_data = datasets.FashionMNIST('/content/drive/MyDrive/IS675_data', download = True, train = False, transform=MNIST_transform)

In [None]:
# Examine the sizes of training and test data
print(len(train_data), len(test_data))

In [None]:
# Display an example image for each class
class_names = ['T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal','Shirt','Sneaker','Bag','Ankle boot']

fig = plt.figure(figsize=(8,3))
num_classes = 10
for i in range(num_classes):
    ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
    ax.set_title(class_names[i])
    img = next(img for img, label in test_data if label == i)
    plt.imshow(np.squeeze(img), cmap='gray')
plt.show()

In [None]:
# Examine the tensor of a Ankle boot image
print(img.shape)
print(img)

In [None]:
# Examine the mean and std of images in the training data
imgs=torch.stack([img_t for img_t,label in train_data],dim=3)
print(imgs.view(1,x6).mean(dim=1),imgs.view(1,x6).std(dim=1))

**Q5.**  What values can x6 take?

In [35]:
# Define the image preprocessing pipeline to include normalization
MNIST_trans=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.2860],std=[0.3530])
])

In [36]:
# Loading images and pass the images through our preprocessing pipeline
train_data = datasets.FashionMNIST('/content/drive/MyDrive/IS675_data', download = True, train = True, transform=MNIST_transform)
test_data = datasets.FashionMNIST('/content/drive/MyDrive/IS675_data', download = True, train = False, transform=MNIST_transform)

In [37]:
# Define training and testing data loader, and set batch size to 256
train_loader = torch.utils.data.DataLoader(train_data, batch_size=256, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=256, shuffle=False)

In [38]:
# Build a neural network on training data
class neural_network(nn.Module):
    def __init__(self,  in_size, hidden_size1,hidden_size2, out_size):
        super().__init__()
        self.network = nn.Sequential(
          nn.Linear(in_size, hidden_size1),
          nn.ReLU(),
          nn.Linear(hidden_size1, hidden_size2),
          nn.ReLU(),
          nn.Linear(hidden_size2, out_size))

    def forward(self, x):
        x_reshape=x.view(x.shape[0],-1)
        out = self.network(x_reshape)
        return out.squeeze()

In [39]:
# Define training loop function
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(0, n_epochs):
        # Training Phase
        model.train()
        loss_train = 0.0
        for inputs, labels in train_loader:

            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

        if epoch == 0 or epoch == n_epochs-1 or epoch % 1 == 0:
            print('Epoch {}, Training loss {}'.format(epoch, loss_train / len(train_loader)))

**Q6.** Initialize the neural network with two hidden layers of size 256 and 64, choose adam optimizer and Cross Entropy loss, and train the model for 10 epochs.

In [None]:
# Q6. Model training
torch.manual_seed(0)


In [41]:
# Define testing function
def test(model, train_loader, test_loader):

  # testing phase
  model.eval()
  predict_train = []
  predict_test = []
  labels_train = []
  labels_test = []

  with torch.no_grad():
      for inputs, labels in train_loader:
          outputs = model(inputs)
          index_, predicted = torch.max(outputs, dim=1)
          predict_train.append(predicted.tolist())
          labels_train.append(labels.tolist())

      for inputs, labels in test_loader:
          outputs = model(inputs)
          index_, predicted = torch.max(outputs, dim=1)
          predict_test.append(predicted.tolist())
          labels_test.append(labels.tolist())

  print("Confusion matrix on train:\n",  confusion_matrix(list(chain(*labels_train)), list(chain(*predict_train)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Classification report on train:\n",  classification_report(list(chain(*labels_train)), list(chain(*predict_train)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Confusion matrix on test:\n",  confusion_matrix(list(chain(*labels_test)), list(chain(*predict_test)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Classification report on test:\n",  classification_report(list(chain(*labels_test)), list(chain(*predict_test)), labels=[0,1,2,3,4,5,6,7,8,9]))

In [None]:
# Examine evaluation results
test(model, train_loader, test_loader)

### Part 3: Image Classification with CIFAR-10 data
CIFAR-10 data:CIFAR-10 consists of 60,000 tiny 32 × 32 color (RGB) images, labeled with an integer corresponding to 1 of 10 classes:

0: airplane,
1: automobile,
2: bird,
3: cat,
4: deer,
5: dog,
6: frog,
7: horse,
8: ship,
9: truck,

In [42]:
# Define the image preprocessing pipeline
cifar10_transform = transforms.Compose([transforms.ToTensor()]) # composes several transforms together

In [None]:
# Loading images and pass the images through our preprocessing pipeline
train_cifar10 = datasets.CIFAR10('/content/drive/MyDrive/IS675_data', train=True, download=True, transform=cifar10_transform)
test_cifar10 = datasets.CIFAR10('/content/drive/MyDrive/IS675_data', train=False, download=True, transform=cifar10_transform)

In [None]:
# Examine the sizes of training and test data
print(len(train_cifar10), len(test_cifar10))

In [None]:
# Display an example image for each class
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

fig = plt.figure(figsize=(8,3))
num_classes = 10
for i in range(num_classes):
    ax = fig.add_subplot(2, 5, 1 + i, xticks=[], yticks=[])
    ax.set_title(class_names[i])
    img = next(img for img, label in train_cifar10 if label == i)
    plt.imshow(img.permute(1, 2, 0)) # torch.Size([32, 32, 3])
plt.show()

In [None]:
# Examine the tensor of a truck image
print(img.shape)
print(img)

In [None]:
# Examine the mean and std of images in the training data
imgs=torch.stack([img_t for img_t,label in train_cifar10],dim=3)
print(imgs.shape)
print(imgs.view(1,x7).mean(dim=1),imgs.view(1,x7).std(dim=1))

**Q7.**  What values can x7 take?

In [53]:
# Define the image preprocessing pipeline to include normalization
cifar10_trans=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4734],std=[0.2516])
])

In [None]:
# Loading images and pass the images through our preprocessing pipeline
train_cifar10 = datasets.CIFAR10('/content/drive/MyDrive/IS675_data', train=True, download=True, transform=cifar10_transform)
test_cifar10 = datasets.CIFAR10('/content/drive/MyDrive/IS675_data', train=False, download=True, transform=cifar10_transform)

In [55]:
# Define training and testing data loader, and set batch size to 256
train_loader = torch.utils.data.DataLoader(train_cifar10, batch_size=256, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_cifar10, batch_size=256, shuffle=False)

In [56]:
# Build a neural network on training data
class neural_network(nn.Module):
    def __init__(self,  in_size, hidden_size1,hidden_size2, out_size):
        super().__init__()
        self.network = nn.Sequential(
          nn.Linear(in_size, hidden_size1),
          nn.ReLU(),
          nn.Linear(hidden_size1, hidden_size2),
          nn.ReLU(),
          nn.Linear(hidden_size2, out_size))

    def forward(self, x):
        x_reshape=x.view(x.shape[0],-1)
        out = self.network(x_reshape)
        return out.squeeze()

In [57]:
# Define training loop function
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(0, n_epochs):
        # Training Phase
        model.train()
        loss_train = 0.0
        for inputs, labels in train_loader:

            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

        if epoch == 0 or epoch == n_epochs-1 or epoch % 1 == 0:
            print('Epoch {}, Training loss {}'.format(epoch, loss_train / len(train_loader)))

**Q8.** Initialize the neural network with two hidden layers of size 256 and 64, choose adam optimizer and Cross Entropy loss, and train the model for 10 epochs.

In [None]:
# Q7. Model training
torch.manual_seed(0)


In [59]:
# Define testing function
def test(model, train_loader, test_loader):

  # testing phase
  model.eval()
  predict_train = []
  predict_test = []
  labels_train = []
  labels_test = []

  with torch.no_grad():
      for inputs, labels in train_loader:
          outputs = model(inputs)
          index_, predicted = torch.max(outputs, dim=1)
          predict_train.append(predicted.tolist())
          labels_train.append(labels.tolist())

      for inputs, labels in test_loader:
          outputs = model(inputs)
          index_, predicted = torch.max(outputs, dim=1)
          predict_test.append(predicted.tolist())
          labels_test.append(labels.tolist())

  print("Confusion matrix on train:\n",  confusion_matrix(list(chain(*labels_train)), list(chain(*predict_train)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Classification report on train:\n",  classification_report(list(chain(*labels_train)), list(chain(*predict_train)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Confusion matrix on test:\n",  confusion_matrix(list(chain(*labels_test)), list(chain(*predict_test)), labels=[0,1,2,3,4,5,6,7,8,9]))
  print()
  print("Classification report on test:\n",  classification_report(list(chain(*labels_test)), list(chain(*predict_test)), labels=[0,1,2,3,4,5,6,7,8,9]))

In [None]:
# Examine evaluation results
test(model, train_loader, test_loader)

**Q9.** Describe the model's performance.

**Before submitting, make sure to keep only the evaluation results logs and close any other logs.**

In [None]:
# Generate a html file
!jupyter nbconvert --to html "/content/drive/MyDrive/Colab Notebooks/IS675_lab8.ipynb"