In [1]:
%pip install h5py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from collections import defaultdict
from torch.utils.data import Dataset, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.image as mpimg
from torch.utils.data import DataLoader

In [4]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #self.linear_layer_size = linear_layer_size

        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3)  # input: torch.Size([64, 3, 96, 96])
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3) # input: torch.Size([64, 16, 47, 47])
        self.conv3 = torch.nn.Conv2d(32, 64, kernel_size=3) # input: torch.Size([64, 32, 22, 22])
        self.conv4 = torch.nn.Conv2d(64, 128, kernel_size=3) # input: torch.Size([64, 64, 10, 10])
        # output: torch.Size([64, 128, 4, 4])
        

    def forward(self, x):
        #print(x.shape)
        batch_size = x.size(0)

        relu = torch.nn.ReLU()
        maxpool = torch.nn.MaxPool2d(stride=2,kernel_size=2)

        x = self.conv1(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv2(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv3(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv4(x)
        x = maxpool(relu(x))
        #print(x.shape)
        # x = x.reshape(batch_size, self.linear_layer_size)
        # x = self.linear(x)
        return x

In [5]:
class FC(torch.nn.Module):
    def __init__(self):
          super(FC, self).__init__()
          self.fc1 = torch.nn.Linear(4 * 4 * 128, 1)  # input: (6,6,128) # output 2 classes
          self.actv = torch.nn.Sigmoid()
    def forward(self, x):
        #print(x.shape)
        batch_size = x.size(0)
        x = torch.flatten(x, 1)
        #print(x.shape)
        x = self.fc1(x)
        x = self.actv(x)
        return x


In [6]:
class LSTM(torch.nn.Module):
    def __init__(self,batch_size=32):
        super(LSTM,self).__init__()
        self.input_size = 128
        self.output_size = 2
        self.hidden_size = 32
        self.n_layers = 1
        self.batch_size = batch_size
        self.lstm = torch.nn.LSTM(input_size=self.input_size,hidden_size=self.hidden_size,num_layers=self.n_layers,batch_first=True)
        self.fc1 = torch.nn.Linear(in_features=5,out_features=self.output_size)

    def forward(self,x):
        output,_status = self.lstm(x)
        output = output.view(-1, self.hidden_size)
        output = self.fc1(torch.relu(output))
        return output

    def initHidden(self):
        return torch.zeros(self.n_layers,self.batch_size, self.hidden_size)


In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [8]:
batch_size = 32

In [9]:
train_data = datasets.PCAM(
    root="/content/drive/MyDrive/CS449",
    split="train",
    download=False,
    transform=ToTensor()
)

In [10]:
val_data = datasets.PCAM(
    root="/content/drive/MyDrive/CS449",
    split="val",
    download=False,
    transform=ToTensor()
)

In [11]:
# train_split = random_split(train_data, [20000, 242144], generator=torch.Generator().manual_seed(42))
# train_data_small = train_split[0]
train_data_size = 500
train_split = random_split(train_data, [train_data_size, len(train_data)-train_data_size], generator=torch.Generator().manual_seed(42))
train_data_small = train_split[0]

In [12]:
train_dataloader = DataLoader(train_data_small, batch_size=batch_size, shuffle=True)

In [13]:
val_data_size = 64
val_split = random_split(val_data, [val_data_size, len(val_data)-val_data_size], generator=torch.Generator().manual_seed(42))
val_data_small = val_split[0]

In [14]:
val_dataloader = DataLoader(val_data_small, batch_size=batch_size, shuffle=True)

In [15]:
def run_one_epoch_cnn_fc(epoch_idx, model1, model2, optimizer, train_dataloader, val_dataloader, train=True):
    model1.train()
    model2.train()
    # loop through the entire dataset once per epoch
    train_loss = 0.0
    train_acc = 0.0
    train_total = 0
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        images = images.to(device)
        labels = labels.to(device)
        model1_output = model1(images)
        #print("hi")
        predictions = model2(model1_output).squeeze()
        acc_sum = 0
        for i, pred in enumerate(predictions):

          if round(pred.item()) == labels[i]:
            acc_sum += 1
        acc = acc_sum / labels.size(0)
        # acc = torch.sum(predictions == labels) / labels.size(0)
        loss = torch.nn.BCELoss()(predictions, labels.float())
        loss.backward()
        optimizer.step()

    # update statistics
        train_loss += (loss * len(images))
        train_acc += (acc * len(labels))
        train_total += len(labels)
    
    train_loss /= train_total
    train_acc /= train_total

    # model1.eval()
    # model2.eval()
    val_loss = 0
    val_acc = 0
    val_total = 0
    with torch.no_grad():
      for batch_idx, (images, labels) in enumerate(val_dataloader):
          images = images.to(device)
          labels = labels.to(device)
          model1_output = model1(images)
          predictions = model2(model1_output).squeeze()
          acc_sum = 0
          for i, pred in enumerate(predictions):
            if round(pred.item()) == labels[i]:
              acc_sum += 1
          acc = acc_sum / labels.size(0)
          # acc = torch.sum(predictions == labels) / labels.size(0)
          loss = torch.nn.BCELoss()(predictions, labels.float())

          # update statistics
          val_loss += (loss * len(images))
          val_acc += (acc * len(labels))
          val_total += len(labels)

      val_loss /= val_total
      val_acc /= val_total

    print(f"Epoch {epoch_idx + 1}:  val loss {val_loss :0.3f}, val acc {val_acc :0.3f}, train loss {train_loss :0.3f}, train acc {train_acc :0.3f}")

    # Detach tells torch to stop tracking a tensor's gradients
    return train_acc, train_loss.detach(), val_acc,val_loss.detach()

In [16]:
# cnn_fc = CNN()
# cnn_fc.to(device)
# fc = FC()
# fc.to(device)
# epochs = 50
# lr = 1e-3

# train_accs = []
# train_losses = []
# val_accs = []
# val_losses = []
# opt = torch.optim.Adam([{"params": cnn_fc.parameters()},{"params":fc.parameters()}], lr=lr)
# for epoch in range(epochs):
#   train_acc, train_loss, val_acc, val_loss = run_one_epoch_cnn_fc(epoch, cnn_fc, fc, opt, train_dataloader, val_dataloader)
#   train_accs.append(train_acc)
#   train_losses.append(train_loss)
#   val_accs.append(val_acc)
#   val_losses.append(val_loss)
# print(train_accs)
# print(train_losses)
# print(val_accs)
# print(val_losses)

In [17]:
# torch.save(cnn_fc.state_dict(),"/content/drive/MyDrive/CS449/models/cnn_fc.model")
# torch.save(fc.state_dict(),"/content/drive/MyDrive/CS449/models/fc.model")

In [18]:
train_accs = []
train_losses = []
val_accs = []
val_losses = []

In [19]:
print(train_accs)

[]


In [20]:
print(val_accs)

[]


In [21]:
tr_losses = []
for loss in train_losses:
  tr_losses.append(loss.item())
print(tr_losses)

[]


In [22]:
vl_losses = []
for loss in val_losses:
  vl_losses.append(loss.item())
print(vl_losses)

[]


In [23]:
def run_one_epoch_cnn_rnn(epoch_idx, model1, model2, optimizer, train_dataloader, val_dataloader, train=True):
    model1.train()
    model2.train()
    # hidden = model2.initHidden()
    # hidden.to(device)
    hidden = None
    # loop through the entire dataset once per epoch
    train_loss = 0.0
    train_acc = 0.0
    train_total = 0
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        images = images.to(device)
        labels = labels.to(device)
        model1_output = model1(images)
        #print("hi")
        predictions = model2(model1_output, hidden).squeeze()
        # hidden = hidden.data
        # hidden.to(device)
        acc_sum = 0
        for i, pred in enumerate(predictions):

          if round(pred.item()) == labels[i]:
            acc_sum += 1
        acc = acc_sum / labels.size(0)
        # acc = torch.sum(predictions == labels) / labels.size(0)
        loss = torch.nn.BCELoss()(predictions, labels.float())
        loss.backward()
        optimizer.step()

    # update statistics
        train_loss += (loss * len(images))
        train_acc += (acc * len(labels))
        train_total += len(labels)
    
    train_loss /= train_total
    train_acc /= train_total

    # model1.eval()
    # model2.eval()
    val_loss = 0
    val_acc = 0
    val_total = 0
    with torch.no_grad():
      for batch_idx, (images, labels) in enumerate(val_dataloader):
          images = images.to(device)
          labels = labels.to(device)
          model1_output = model1(images)
          predictions = model2(model1_output,hidden).squeeze()
          acc_sum = 0
          for i, pred in enumerate(predictions):
            if round(pred.item()) == labels[i]:
              acc_sum += 1
          acc = acc_sum / labels.size(0)
          # acc = torch.sum(predictions == labels) / labels.size(0)
          loss = torch.nn.BCELoss()(predictions, labels.float())

          # update statistics
          val_loss += (loss * len(images))
          val_acc += (acc * len(labels))
          val_total += len(labels)

      val_loss /= val_total
      val_acc /= val_total

    print(f"Epoch {epoch_idx + 1}:  val loss {val_loss :0.3f}, val acc {val_acc :0.3f}, train loss {train_loss :0.3f}, train acc {train_acc :0.3f}")

    # Detach tells torch to stop tracking a tensor's gradients
    return train_acc, train_loss.detach(), val_acc,val_loss.detach()

In [24]:
class RNN(torch.nn.Module):
    def __init__(self, batch_size=32):
        super(RNN, self).__init__()
        self.input_size = 128
        self.output_size = 1
        self.hidden_size = 128
        self.n_layers = 1
        self.batch_size = batch_size
        # batch_first means that the first dim of the input and output will be the batch_size
        # self.rnn = torch.nn.RNN(self.input_size, self.hidden_size, self.n_layers, batch_first=True)
        # last, fully-connected layer
        self.rnn = torch.nn.RNNCell(self.input_size, self.hidden_size)
        self.fc = torch.nn.Linear(self.hidden_size*16, self.output_size)
        self.actv = torch.nn.Sigmoid()

    def forward(self, x, hidden):
        # x (batch_size, seq_length, input_size)
        # hidden (n_layers, batch_size, hidden_dim)
        # r_out (batch_size, time_step, hidden_size)
        if hidden != None:
          print("hidden: ",hidden.shape)
        batch_size = x.size(0)
        print("batch_size",batch_size,"shape", x.shape)
        x = x.permute(0,2,3,1).reshape(batch_size,16,-1)
        print("after shape", x.shape)
        output= torch.tensor([]).to(device)
        for pixel in range(16):
          input = x[:,pixel,:]
          hidden = self.rnn(input, hidden)
          # print("hidden: ", hidden.shape)
          output = torch.cat((output, hidden), dim=1)
        # get RNN outputs
        print("output: ",output.shape)
        # shape output to be (batch_size*seq_length, hidden_dim)
        #r_out = r_out.view(-1, self.hidden_size)
        
        # get final output 
        output = self.fc(output)
        
        #return output, hidden
        return self.actv(output)
        
    # def initHidden(self):
    #     return torch.zeros(self.n_layers,self.batch_size, self.hidden_size)

In [None]:
cnn_rnn= CNN()
cnn_rnn.to(device)
rnn = RNN()
rnn.to(device)

epochs = 50
lr = 1e-3

train_accs = []
train_losses = []
val_accs = []
val_losses = []
opt = torch.optim.Adam([{"params": cnn_rnn.parameters()},{"params":rnn.parameters()}], lr=lr)
for epoch in range(epochs):
  print("starting epoch")
  train_acc, train_loss, val_acc, val_loss = run_one_epoch_cnn_rnn(epoch, cnn_rnn, rnn, opt, train_dataloader, val_dataloader)
  train_accs.append(train_acc)
  train_losses.append(train_loss)
  val_accs.append(val_acc)
  val_losses.append(val_loss)
  print("ending epoch")
print(train_accs)
print(train_losses)
print(val_accs)
print(val_losses)

starting epoch
batch_size 32 shape torch.Size([32, 128, 4, 4])
after shape torch.Size([32, 16, 128])
output:  torch.Size([32, 2048])
batch_size 32 shape torch.Size([32, 128, 4, 4])
after shape torch.Size([32, 16, 128])
output:  torch.Size([32, 2048])
