In [1]:
%pip install h5py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from collections import defaultdict
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.image as mpimg
from torch.utils.data import DataLoader

In [4]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #self.linear_layer_size = linear_layer_size

        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3)  # input: torch.Size([64, 3, 96, 96])
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3) # input: torch.Size([64, 16, 47, 47])
        self.conv3 = torch.nn.Conv2d(32, 64, kernel_size=3) # input: torch.Size([64, 32, 22, 22])
        self.conv4 = torch.nn.Conv2d(64, 128, kernel_size=3) # input: torch.Size([64, 64, 10, 10])
        # output: torch.Size([64, 128, 4, 4])
        

    def forward(self, x):
        #print(x.shape)
        batch_size = x.size(0)

        relu = torch.nn.ReLU()
        maxpool = torch.nn.MaxPool2d(stride=2,kernel_size=2)

        x = self.conv1(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv2(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv3(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv4(x)
        x = maxpool(relu(x))
        #print(x.shape)
        # x = x.reshape(batch_size, self.linear_layer_size)
        # x = self.linear(x)
        return x

In [5]:
class FC(torch.nn.Module):
    def __init__(self):
          super(FC, self).__init__()
          self.fc1 = torch.nn.Linear(4 * 4 * 128, 1)  # input: (6,6,128) # output 2 classes
          self.actv = torch.nn.Sigmoid()
    def forward(self, x):
        #print(x.shape)
        batch_size = x.size(0)
        x = torch.flatten(x, 1)
        #print(x.shape)
        x = self.fc1(x)
        x = self.actv(x)
        return x


In [6]:
class RNN(torch.nn.Module):
    def __init__(self, ):
        super(RNN, self).__init__()
        self.input_size = 128
        self.output_size = 2
        self.hidden_size = 32
        self.n_layers = 1
        # batch_first means that the first dim of the input and output will be the batch_size
        self.rnn = torch.nn.RNN(self.input_size, self.hidden_size, self.n_layers, batch_first=True)
        # last, fully-connected layer
        self.fc = torch.nn.Linear(self.hidden_dim, self.output_size)

    def forward(self, x, hidden):
        # x (batch_size, seq_length, input_size)
        # hidden (n_layers, batch_size, hidden_dim)
        # r_out (batch_size, time_step, hidden_size)
        batch_size = x.size(0)
        
        # get RNN outputs
        r_out, hidden = self.rnn(x, hidden)
        # shape output to be (batch_size*seq_length, hidden_dim)
        r_out = r_out.view(-1, self.hidden_size)
        
        # get final output 
        output = self.fc(r_out)
        
        return output, hidden

In [7]:
class LSTM(torch.nn.Module):
    def __init__(self):
        super(LSTM,self).__init__()
        self.input_size = 128
        self.output_size = 2
        self.hidden_size = 32
        self.n_layers = 1
        self.lstm = torch.nn.LSTM(input_size=self.input_size,hidden_size=self.hidden_size,num_layers=self.n_layers,batch_first=True)
        self.fc1 = torch.nn.Linear(in_features=5,out_features=self.output_size)

    def forward(self,x):
        output,_status = self.lstm(x)
        output = output.view(-1, self.hidden_size)
        output = self.fc1(torch.relu(output))
        return output


In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [9]:
train_data = datasets.PCAM(
    root="/content/drive/MyDrive/CS449",
    split="train",
    download=False,
    transform=ToTensor()
)

In [None]:
train_data_small = []
for i in range(5000):
  print(i)
  train_data_small.append(train_data[i])

In [18]:
train_dataloader = DataLoader(train_data_small, batch_size=32, shuffle=True)

In [19]:
def run_one_epoch(epoch_idx, model1, model2, optimizer, train_dataloader, val_dataloader, train=True):
    model1.train()
    model2.train()
    # loop through the entire dataset once per epoch
    train_loss = 0.0
    train_acc = 0.0
    train_total = 0
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        images = images.to(device)
        labels = labels.to(device)
        model1_output = model1(images)
        #print("hi")
        predictions = model2(model1_output).squeeze()
        acc_sum = 0
        for i, pred in enumerate(predictions):

          if round(pred.item()) == labels[i]:
            acc_sum += 1
        train_acc = acc_sum / labels.size(0)
        # acc = torch.sum(predictions == labels) / labels.size(0)
        train_loss = torch.nn.BCELoss()(predictions, labels.float())
        train_loss.backward()
        optimizer.step()

    # update statistics
        train_loss += (loss * len(images))
        train_acc += (acc * len(labels))
        train_total += len(labels)
      
    
    train_loss /= train_total
    train_acc /= train_total

    model1.eval()
    model2.eval()
    with torch.no_grad():
      for batch_idx, (images, labels) in enumerate(val_dataloader):
          images = images.to(device)
          labels = labels.to(device)
          model1_output = model1(images)
          predictions = model2(model1_output).squeeze()
          acc_sum = 0
          for i, pred in enumerate(predictions):
            if round(pred.item()) == labels[i]:
              acc_sum += 1
          val_acc = acc_sum / labels.size(0)
          # acc = torch.sum(predictions == labels) / labels.size(0)
          val_loss = torch.nn.BCELoss()(predictions, labels.float())

    print(f"Epoch {epoch_idx + 1}:  val loss {val_loss :0.3f}, val acc {val_acc :0.3f}, train loss {train_loss :0.3f}, train acc {train_acc :0.3f}")

    # Detach tells torch to stop tracking a tensor's gradients
    return train_acc, train_loss.detach(), val_acc, val_loss.detach()

In [26]:
cnn_fc = CNN()
cnn_fc.to(device)
fc = FC()
fc.to(device)

FC(
  (fc1): Linear(in_features=2048, out_features=1, bias=True)
  (actv): Sigmoid()
)

In [28]:
val_data_small = []
for i in range(5000, 6000, 1):
  val_data_small.append(train_data[i])

In [29]:
val_dataloader = DataLoader(val_data_small, batch_size=32, shuffle=True)

In [27]:
epochs = 50
lr = 1e-3


opt = torch.optim.Adam([{"params": cnn_fc.parameters()},{"params":fc.parameters()}], lr=lr)
for epoch in range(epochs):
  train_acc, train_loss, val_acc, val_loss = run_one_epoch(epoch+1, cnn_fc, fc, opt, train_dataloader, val_dataloader)

Epoch 2: train loss 0.661, train acc 0.576
Epoch 3: train loss 0.513, train acc 0.757
Epoch 4: train loss 0.478, train acc 0.776
Epoch 5: train loss 0.458, train acc 0.788
Epoch 6: train loss 0.449, train acc 0.799
Epoch 7: train loss 0.447, train acc 0.795
Epoch 8: train loss 0.447, train acc 0.799
Epoch 9: train loss 0.436, train acc 0.803
Epoch 10: train loss 0.421, train acc 0.813
Epoch 11: train loss 0.417, train acc 0.809
Epoch 12: train loss 0.415, train acc 0.810
Epoch 13: train loss 0.417, train acc 0.813
Epoch 14: train loss 0.406, train acc 0.820
Epoch 15: train loss 0.393, train acc 0.829
Epoch 16: train loss 0.379, train acc 0.837
Epoch 17: train loss 0.383, train acc 0.837
Epoch 18: train loss 0.374, train acc 0.841
Epoch 19: train loss 0.355, train acc 0.853
Epoch 20: train loss 0.346, train acc 0.856
Epoch 21: train loss 0.331, train acc 0.861
Epoch 22: train loss 0.310, train acc 0.875
Epoch 23: train loss 0.285, train acc 0.885
Epoch 24: train loss 0.256, train acc 0.