<a href="https://colab.research.google.com/github/ashley-ferreira/PHYS449_FinalProject/blob/main/CNN_Fully_Augmented_Dataset_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **PHYS 449: Final Project Notebook**
#### Reproducing results from "Morphological classification of galaxies with deep learning: comparing 3-way and 4-way CNNs" by Mitchell K. Cavanagh, Kenji Bekki and Brent A. Groves

Use Pytorch on Fully Augmented Dataset.
Use C1 and C2.

# **Set Current Working Directory**

For example, for Ashley this is:

'/content/drive/MyDrive/Fall 2022/PHYS 449/Final Project'

In [1]:
CWD = '/content/drive/MyDrive/' #Jordan's current working directory
from google.colab import drive #mount google drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Import Packages**

Begin by importing all the needed packages

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms as T
from torch.utils.data import Dataset, DataLoader
import re

# **Define Network Structure**
We are considering two 2D CNNs, C1 and C2, which are described in the paper and outlined below

In [3]:
num_classes = 4 #set the number of classes for the model

In [4]:
#C1
networkc1 = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=0),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=0),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(kernel_size=2),
    nn.Flatten(),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(135424,256),
    nn.ReLU(),
    nn.Linear(256, num_classes))

In [5]:
#C2
networkc2 = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=32, kernel_size=7, stride=1, padding=0),
    nn.ReLU(),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(kernel_size=2),

    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=0),
    nn.ReLU(),
    nn.BatchNorm2d(64),

    nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=0),
    nn.ReLU(),
    nn.BatchNorm2d(64),
    nn.MaxPool2d(kernel_size=2),

    nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=0),
    nn.ReLU(),
    nn.BatchNorm2d(128),
    nn.MaxPool2d(kernel_size=2),

    nn.Flatten(),
    nn.Linear(8192, 256),
    nn.Dropout(0.5),

    nn.ReLU(),
    nn.Linear(256,256),
    nn.ReLU(),
    nn.Linear(256, num_classes))

# **Load Data**

In [6]:
#LOAD THE DATA FROM TXT FILE INTO A BATCH:
def data_batch(datafile_index, num_images=10, data_file=CWD+'/data/data_g_band_v2.txt', plotting=False):
    '''
    Description:
        Access datafile.txt, each row is flattened 110x110 image + 1 label string (E, Sp, S0, Irr+Misc).
        Returns an augmented batch of num_images X 40.
        The labels are converted to 1D vectors (ex: Sp = [0,0,1,0])
        Need to give a datafile_index that tells which rows to pick.
    Inputs:
        datafile_index: index of row in datafile to load. loads rows datafile_index to datafile_index+num_images.
        num_images: number of different images to load per batch, total batch size 
        is 40 X num_images. (default: 10 (for 40X10 = 400 batch size like in paper)
        data_file: datafile full path, need to add shortcut to local Drive. (default: '/content/drive/MyDrive/data/data_g_band.txt')
    Outputs:
        tensor_input_batch_aug: dimensions: (100, 100, num_images X 40). 
        tensor_label_batch_aug: dimensions: (num_images X 40, 4)
    '''

    #Take batch of num_images rows from datafile:
    with open(data_file, 'r') as f:
        rows = f.readlines()[datafile_index:(datafile_index+num_images)]

    #for batch size of 400 (augmented), need 10 images
    data_batch = np.zeros((num_images,12101), dtype=np.dtype('U10'))
    count = 0
    for row in rows:
        data_batch[count,:] = row.split()
        count += 1

    #separate label and input:
    input_batch_flat = np.array(data_batch[:,:12100], dtype=float)#, dtype=int)
    label_batch = np.array(data_batch[:,-1])

    #convert input batch back to a 2D array:
    input_batch = np.zeros((110,110,np.shape(input_batch_flat)[0]))#, dtype=int)
    for ii in range(np.shape(input_batch_flat)[0]):
        input_batch[:,:,ii] = np.reshape(input_batch_flat[ii,:], (110,110))


    #convert label batch into into 1D vector: 
    #E=0, S0=1, Sp=2, Irr+Misc=3
    #ex: label = [0,0,1,0] ==> Sp galagy
    arr_label_batch = np.zeros((np.shape(label_batch)[0],4), dtype=int)
    arr_label_batch[:,0] = np.array([label_batch == 'E'], dtype=int)
    arr_label_batch[:,1] = np.array([label_batch == 'Sp'], dtype=int)
    arr_label_batch[:,2] = np.array([label_batch == 'S0'], dtype=int)
    arr_label_batch[:,3] = np.array([label_batch == 'Irr+Misc'], dtype=int)

    if plotting == True:
      #test with image plotted
      plt.imshow(input_batch[:,:,0])
      plt.show()

    #NOW AUGMENT THE BATCH (40X more):
    input_batch_aug = np.empty((100,100,np.shape(input_batch)[2]*40), dtype=int)
    arr_label_batch_aug = np.empty((np.shape(arr_label_batch)[0]*40, 4), dtype=int)

    count = 0
    for ll in range(np.shape(input_batch)[2]):
        #Crop 5X more image (100X100 pixels)
        C1 = input_batch[:100,:100,ll]
        C2 = input_batch[10:,:100,ll]
        C3 = input_batch[:100,10:,ll]
        C4 = input_batch[10:,10:,ll]
        C5 = input_batch[5:105,5:105,ll]

        C = [C1, C2, C3, C4, C5]

        for kk in range(5):
            #Rotate 4X more image (by 90 deg)
            for jj in range(4):
                C_R = np.rot90(C[kk], k=jj)
                input_batch_aug[:,:,count] = C_R
                arr_label_batch_aug[count,:] = arr_label_batch[ll,:]
                count += 1
                
                input_batch_aug[:,:,count] = np.swapaxes(C_R,0,1)
                arr_label_batch_aug[count,:] = arr_label_batch[ll,:]
                count += 1


    #PUT THE DATA AS A PYTORCH TENSOR:
    tensor_input_batch_aug = torch.Tensor(input_batch_aug)
    tensor_label_batch_aug = torch.Tensor(arr_label_batch_aug)
    
    return tensor_input_batch_aug, tensor_label_batch_aug


In [7]:
network_to_train = 'C1'

# define hyperparameters of training
if network_to_train == 'C1':
  n_epochs = 12
  cn_model = networkc1
  optimizer = torch.optim.Adam(cn_model.parameters(), lr=2e-4)

elif network_to_train == 'C2':
  n_epochs = 20
  cn_model = networkc2
  lr = 2*pow(10,-4)
  optimizer = torch.optim.Adam(cn_model.parameters(), lr=lr)

# **Training**

C2 uses Adam and C1 uses Adam: 


In [8]:
#Define loss function
loss_fn = torch.nn.CrossEntropyLoss()

In [9]:
# Initialize network & move to GPU
cn_model.to('cuda')

Sequential(
  (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Flatten(start_dim=1, end_dim=-1)
  (8): Dropout(p=0.5, inplace=False)
  (9): ReLU()
  (10): Linear(in_features=135424, out_features=256, bias=True)
  (11): ReLU()
  (12): Linear(in_features=256, out_features=4, bias=True)
)

In [10]:
#Define Train and test set
dataset_size = 280
train_split = 0.85# same as in paper
test_split = 1 - train_split
split_cutoff = int(dataset_size*train_split)

rand_index = np.random.permutation(dataset_size)
rand_train = rand_index[:split_cutoff] 
rand_test = rand_index[split_cutoff:dataset_size] # valudation will be taken from test set

In [11]:
# For monitoring acc and losses
avg_epoch_acc_train = []
avg_epoch_acc_val = []
avg_epoch_losses_train = []
avg_epoch_losses_val = []

num_images = 50 #number of images to augment in each batch
batch_size = num_images*40 

print('Model initialized and prepped, begin training...')

for epoch in range(n_epochs):  
    cn_model.train()
    print('epoch:', epoch+1)

    #VALIDATION FOR before any training, (used to check initialization)
    if epoch == 0:
        ds_valid_size = 0
        cn_model.eval()
        epoch_loss = 0
        test_total_accuracy = 0
        with torch.no_grad():
          for ii in range(np.shape(rand_test)[0]):
            im_valid, y_valid = data_batch(datafile_index=num_images*rand_test[ii], num_images=num_images)
            im_valid = im_valid.reshape(100, 100, 1, im_valid.shape[2])
            im_valid = im_valid.T

            im_valid = im_valid.detach().to('cuda')
            y_valid = y_valid.detach().to('cuda')

            y_pred_valid = cn_model(im_valid)
            y_pred_valid_cat = nn.functional.softmax(y_pred_valid, dim=1)

            #updated accuracy calculation:
            test_predictions = torch.argmax(y_pred_valid_cat, dim=1)
            test_label_predictions = torch.argmax(y_valid, dim=1)
            test_batch_size = np.shape(test_predictions)[0]
            test_batch_accuracy = torch.sum(test_predictions == test_label_predictions).item()/test_batch_size
            print(f'\t\t test batch accuracy = {np.round(100*test_batch_accuracy,2)} %, batch # {ds_valid_size}')
            test_total_accuracy += test_batch_accuracy

            loss = loss_fn(y_pred_valid, y_valid)
            epoch_loss += loss.item()
            ds_valid_size += 1

            #delete image and label every loop train:
            del im_valid
            del y_valid
            torch.cuda.empty_cache()
          
          #calculate total loss validation
          v_loss = epoch_loss / ds_valid_size
          avg_epoch_losses_val.append(v_loss)
          print('validation loss:', np.round(v_loss,2))

          #calculate total accuracy validation
          test_total_accuracy = 100 * test_total_accuracy / np.shape(rand_test)[0]
          print('Validation accuracy:', np.round(test_total_accuracy,2), '%')
          avg_epoch_acc_val.append(test_total_accuracy)



    #get training dataset size
    ds_size = 0
    
    train_total_accuracy = 0
    epoch_loss = 0
    for ii in range(np.shape(rand_train)[0]):
      optimizer.zero_grad() #reset the gradients

      im2, y = data_batch(datafile_index=num_images*rand_train[ii], num_images=num_images)

      # reshaping im to what we want
      im2 = im2.reshape(100, 100, 1, im2.shape[2])
      im = im2.T

      del im2

      im = im.detach().to('cuda')
      y = y.detach().to('cuda')

      y_pred = cn_model(im)
      y_pred_cat = nn.functional.softmax(y_pred, dim=1)
      

      #updated accuracy calculation:
      train_predictions = torch.argmax(y_pred_cat, dim=1)
      train_label_predictions = torch.argmax(y, dim=1)
      train_batch_size = np.shape(train_predictions)[0]
      train_batch_accuracy = torch.sum(train_predictions == train_label_predictions).item()/train_batch_size
      print(f'\t\t train batch accuracy = {np.round(100*train_batch_accuracy,2)} %, batch # {ds_size}')
      train_total_accuracy += train_batch_accuracy

      #doing the backprop after each batch
      loss = loss_fn(y_pred, y)
      loss.backward()
      optimizer.step()
      epoch_loss += loss.item()
      ds_size += 1

      del im
      del y
      torch.cuda.empty_cache()

    t_loss = epoch_loss / ds_size
    print('training loss:', np.round(t_loss,2))
    avg_epoch_losses_train.append(t_loss)

    train_total_accuracy = 100 * train_total_accuracy / np.shape(rand_train)[0]
    print('training accuracy:', np.round(train_total_accuracy,2), '%')
    avg_epoch_acc_train.append(train_total_accuracy)


    #Full VALIDATION:----------------------------------

    ds_valid_size = 0
    cn_model.eval() 
    epoch_loss = 0
    test_total_accuracy = 0
    with torch.no_grad():
      for ii in range(np.shape(rand_test)[0]):
        im_valid, y_valid = data_batch(datafile_index=num_images*rand_test[ii], num_images=num_images)
        im_valid = im_valid.reshape(100, 100, 1, im_valid.shape[2])
        im_valid = im_valid.T

        im_valid = im_valid.detach().to('cuda')
        y_valid = y_valid.detach().to('cuda')

        y_pred_valid = cn_model(im_valid)
        y_pred_valid_cat = nn.functional.softmax(y_pred_valid, dim=1)

        #updated accuracy calculation:
        test_predictions = torch.argmax(y_pred_valid_cat, dim=1)
        test_label_predictions = torch.argmax(y_valid, dim=1)
        test_batch_size = np.shape(test_predictions)[0]
        test_batch_accuracy = torch.sum(test_predictions == test_label_predictions).item()/test_batch_size
        print(f'\t\t test batch accuracy = {np.round(100*test_batch_accuracy,2)} %, batch # {ds_valid_size}')
        test_total_accuracy += test_batch_accuracy

        loss = loss_fn(y_pred_valid, y_valid)
        epoch_loss += loss.item()
        ds_valid_size += 1

        #delete image and label every loop train:
        del im_valid
        del y_valid
        torch.cuda.empty_cache()
      
      #calculate total loss validation
      v_loss = epoch_loss / ds_valid_size
      avg_epoch_losses_val.append(v_loss)
      print('validation loss:', np.round(v_loss,2))

      #calculate total accuracy validation
      test_total_accuracy = 100 * test_total_accuracy / np.shape(rand_test)[0]
      print('Validation accuracy:', np.round(test_total_accuracy,2), '%')
      avg_epoch_acc_val.append(test_total_accuracy)


print("DONE TRAINING")

Model initialized and prepped, begin training...
epoch: 1


  im_valid = im_valid.T


KeyboardInterrupt: ignored

Ashley's troubleshooting notes:
- I actually run into the issue again when I try num_images = 50, but num_images = 10 seems to be totally fine
- I thought the issue was due to us using empty in the data loading function and it using old memory so I did make some changes in that function like replacing that with zeros
- C1 might not be working due to different stride and no set learning rate
- The paper also seems to have weird learning results...

In [None]:
train_acc = np.array(avg_epoch_acc_train)
valid_acc = np.array(avg_epoch_acc_val)
train_loss = np.array(avg_epoch_losses_train)
valid_loss = np.array(avg_epoch_losses_val)

print(train_acc, valid_acc, train_loss, valid_loss)

#Plot accuracy results:
plt.figure(figsize=(7,5)) #set plot size

plt.plot(range(np.shape(train_acc)[0]), train_acc, label='Training Accuracy', 
             linestyle='-', color='red', linewidth=3)
plt.plot(range(np.shape(valid_acc)[0]), valid_acc, label='Validation Accuracy', 
             linestyle='-', color='blue', linewidth=3)

plt.yticks(fontsize=12, rotation=0) #adjust axis tick numbers font size
plt.xticks(fontsize=12, rotation=0) #adjust axis tick numbers font size
plt.xlabel('Epoch Number', fontsize=14) #set axis label
plt.ylabel('Percent Accuracy', fontsize=14) #set axis label
plt.title('Training of 4 way C2 CNN Network', fontsize=16) #set title
plt.legend(fontsize=10)
plt.xlim(0, np.shape(train_acc)[0]-1) #set axis limits
plt.grid(True, which='minor', color='gray', linestyle='--', linewidth=1, alpha=0.2) #set gridlines
plt.grid(True, which='major', color='gray', linestyle='-', linewidth=1, alpha=0.5) #set gridlines
plt.tight_layout()
plt.savefig(CWD+'data/Jordan_C1_4way_Accuracy_plot_v1.png',dpi=300)
#plt.close() #Stops the figure from being shown
plt.show() #display the figure



#Plot loss results:
plt.figure(figsize=(7,5)) #set plot size

plt.plot(range(np.shape(train_loss)[0]), train_loss, label='Training Loss', 
             linestyle='-', color='red', linewidth=3)
plt.plot(range(np.shape(valid_loss)[0]), valid_loss, label='Validation Loss', 
             linestyle='-', color='blue', linewidth=3)

plt.yticks(fontsize=12, rotation=0) #adjust axis tick numbers font size
plt.xticks(fontsize=12, rotation=0) #adjust axis tick numbers font size
plt.xlabel('Epoch Number', fontsize=14) #set axis label
plt.ylabel('Loss', fontsize=14) #set axis label
plt.title('Training of 4 way C2 CNN Network', fontsize=16) #set title
plt.legend(fontsize=10)
plt.xlim(0, np.shape(train_acc)[0]-1) #set axis limits
plt.grid(True, which='minor', color='gray', linestyle='--', linewidth=1, alpha=0.2) #set gridlines
plt.grid(True, which='major', color='gray', linestyle='-', linewidth=1, alpha=0.5) #set gridlines
plt.tight_layout()
#plt.yscale('log')
plt.savefig(CWD+'data/Jordan_C1_4way_Loss_plot_v1.png',dpi=300)
#plt.close() #Stops the figure from being shown
plt.show() #display the figure

In [None]:
# save model itself 
torch.save(cn_model.state_dict(), CWD+'data/C1_4way_Full_Augmentation_model')#, CWD + 'Notebooks/models/')