In [1]:
%pip install h5py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from collections import defaultdict
from torch.utils.data import Dataset, random_split
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.image as mpimg
from torch.utils.data import DataLoader

In [4]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #self.linear_layer_size = linear_layer_size

        self.conv1 = torch.nn.Conv2d(3, 16, kernel_size=3)  # input: torch.Size([64, 3, 96, 96])
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3) # input: torch.Size([64, 16, 47, 47])
        self.conv3 = torch.nn.Conv2d(32, 64, kernel_size=3) # input: torch.Size([64, 32, 22, 22])
        self.conv4 = torch.nn.Conv2d(64, 128, kernel_size=3) # input: torch.Size([64, 64, 10, 10])
        # output: torch.Size([64, 128, 4, 4])
        

    def forward(self, x):
        #print(x.shape)
        batch_size = x.size(0)

        relu = torch.nn.ReLU()
        maxpool = torch.nn.MaxPool2d(stride=2,kernel_size=2)

        x = self.conv1(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv2(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv3(x)
        x = maxpool(relu(x))
        #print(x.shape)
        x = self.conv4(x)
        x = maxpool(relu(x))
        #print(x.shape)
        # x = x.reshape(batch_size, self.linear_layer_size)
        # x = self.linear(x)
        return x

In [5]:
class FC(torch.nn.Module):
    def __init__(self):
          super(FC, self).__init__()
          self.fc1 = torch.nn.Linear(4 * 4 * 128, 1)  # input: (6,6,128) # output 2 classes
          self.actv = torch.nn.Sigmoid()
    def forward(self, x):
        #print(x.shape)
        batch_size = x.size(0)
        x = torch.flatten(x, 1)
        #print(x.shape)
        x = self.fc1(x)
        x = self.actv(x)
        return x


In [83]:
class RNN(torch.nn.Module):
    def __init__(self, batch_size=32):
        super(RNN, self).__init__()
        self.input_size = 128
        self.output_size = 2
        self.hidden_size = 32
        self.n_layers = 1
        self.batch_size = batch_size
        # batch_first means that the first dim of the input and output will be the batch_size
        self.rnn = torch.nn.RNN(self.input_size, self.hidden_size, self.n_layers, batch_first=True)
        # last, fully-connected layer
        self.fc = torch.nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x, hidden):
        # x (batch_size, seq_length, input_size)
        # hidden (n_layers, batch_size, hidden_dim)
        # r_out (batch_size, time_step, hidden_size)
        print("before reshape:", x.shape)
        if hidden != None:
          print("hidden: ",hidden.shape)
        batch_size = x.size(0)
        x = x.permute(0,2,3,1).reshape(batch_size,16,-1)
        output= torch.tensor([])
        for pixel in range(16):
          input = x[:,pixel,:]
          print("after reshape:", input.shape)
          r_out, hidden = self.rnn(input, hidden)
          output = torch.cat(output, r_out)
        # get RNN outputs
        
        # shape output to be (batch_size*seq_length, hidden_dim)
        r_out = r_out.view(-1, self.hidden_size)
        
        # get final output 
        output = self.fc(r_out)
        
        return output, hidden
        
    def initHidden(self):
        return torch.zeros(self.n_layers,self.batch_size, self.hidden_size)

In [73]:
class LSTM(torch.nn.Module):
    def __init__(self,batch_size=32):
        super(LSTM,self).__init__()
        self.input_size = 128
        self.output_size = 2
        self.hidden_size = 32
        self.n_layers = 1
        self.batch_size = batch_size
        self.lstm = torch.nn.LSTM(input_size=self.input_size,hidden_size=self.hidden_size,num_layers=self.n_layers,batch_first=True)
        self.fc1 = torch.nn.Linear(in_features=5,out_features=self.output_size)

    def forward(self,x):
        output,_status = self.lstm(x)
        output = output.view(-1, self.hidden_size)
        output = self.fc1(torch.relu(output))
        return output

    def initHidden(self):
        return torch.zeros(self.n_layers,self.batch_size, self.hidden_size)


In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [9]:
train_data = datasets.PCAM(
    root="/content/drive/MyDrive/CS449",
    split="train",
    download=False,
    transform=ToTensor()
)

In [10]:
val_data = datasets.PCAM(
    root="/content/drive/MyDrive/CS449",
    split="val",
    download=False,
    transform=ToTensor()
)

In [31]:
len(train_data)

262144

In [None]:
train_data[0][0]

In [None]:
y = np.transpose(train_data[0][0], (1,2,0))
y

In [40]:
t = np.array([[1,2],[3,4]])

In [43]:
t.transpose()

array([[1, 3],
       [2, 4]])

In [27]:
train_split = random_split(train_data, [20000, 242144], generator=torch.Generator().manual_seed(42))
train_data_small = train_split[0]

In [19]:
# train_data_small = []
# for i in range(20000):
#   print(i)
#   train_data_small.append(train_data[i])

In [20]:
train_dataloader = DataLoader(train_data_small, batch_size=32, shuffle=True)

In [21]:
# val_data_small = []
# for i in range(5000):
#   val_data_small.append(val_data[i])

In [28]:
len(val_data)

32768

In [29]:
val_split = random_split(val_data, [2000, 30768], generator=torch.Generator().manual_seed(42))
val_data_small = val_split[0]

In [30]:
def run_one_epoch_cnn_fc(epoch_idx, model1, model2, optimizer, train_dataloader, val_dataloader, train=True):
    model1.train()
    model2.train()
    # loop through the entire dataset once per epoch
    train_loss = 0.0
    train_acc = 0.0
    train_total = 0
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        images = images.to(device)
        labels = labels.to(device)
        model1_output = model1(images)
        #print("hi")
        predictions = model2(model1_output).squeeze()
        acc_sum = 0
        for i, pred in enumerate(predictions):

          if round(pred.item()) == labels[i]:
            acc_sum += 1
        acc = acc_sum / labels.size(0)
        # acc = torch.sum(predictions == labels) / labels.size(0)
        loss = torch.nn.BCELoss()(predictions, labels.float())
        loss.backward()
        optimizer.step()

    # update statistics
        train_loss += (loss * len(images))
        train_acc += (acc * len(labels))
        train_total += len(labels)
    
    train_loss /= train_total
    train_acc /= train_total

    # model1.eval()
    # model2.eval()
    val_loss = 0
    val_acc = 0
    val_total = 0
    with torch.no_grad():
      for batch_idx, (images, labels) in enumerate(val_dataloader):
          images = images.to(device)
          labels = labels.to(device)
          model1_output = model1(images)
          predictions = model2(model1_output).squeeze()
          acc_sum = 0
          for i, pred in enumerate(predictions):
            if round(pred.item()) == labels[i]:
              acc_sum += 1
          acc = acc_sum / labels.size(0)
          # acc = torch.sum(predictions == labels) / labels.size(0)
          loss = torch.nn.BCELoss()(predictions, labels.float())

          # update statistics
          val_loss += (loss * len(images))
          val_acc += (acc * len(labels))
          val_total += len(labels)

      val_loss /= val_total
      val_acc /= val_total

    print(f"Epoch {epoch_idx + 1}:  val loss {val_loss :0.3f}, val acc {val_acc :0.3f}, train loss {train_loss :0.3f}, train acc {train_acc :0.3f}")

    # Detach tells torch to stop tracking a tensor's gradients
    return train_acc, train_loss.detach(), val_acc,val_loss.detach()

In [48]:
val_dataloader = DataLoader(val_data_small, batch_size=32, shuffle=True)

In [49]:
cnn_fc = CNN()
cnn_fc.to(device)
fc = FC()
fc.to(device)

FC(
  (fc1): Linear(in_features=2048, out_features=1, bias=True)
  (actv): Sigmoid()
)

In [52]:
epochs = 50
lr = 1e-3

train_accs = []
train_losses = []
val_accs = []
val_losses = []
opt = torch.optim.Adam([{"params": cnn_fc.parameters()},{"params":fc.parameters()}], lr=lr)
for epoch in range(epochs):
  train_acc, train_loss, val_acc, val_loss = run_one_epoch_cnn_fc(epoch, cnn_fc, fc, opt, train_dataloader, val_dataloader)
  train_accs.append(train_acc)
  train_losses.append(train_loss)
  val_accs.append(val_acc)
  val_losses.append(val_loss)
print(train_accs)
print(train_losses)
print(val_accs)
print(val_losses)

KeyboardInterrupt: ignored

In [None]:
torch.save(cnn_fc.state_dict(),"/content/drive/MyDrive/CS449/models/cnn_fc.model")
torch.save(fc.state_dict(),"/content/drive/MyDrive/CS449/models/fc.model")

In [None]:
print(train_accs)

[0.74955, 0.79325, 0.8085, 0.8161, 0.8257, 0.8349, 0.8433, 0.85205, 0.86625, 0.8723, 0.8819, 0.8916, 0.9027, 0.9127, 0.92425, 0.9368, 0.94635, 0.95775, 0.9635, 0.9742, 0.9777, 0.98455, 0.98045, 0.9893, 0.98975, 0.984, 0.98715, 0.9919, 0.9858, 0.9879, 0.99295, 0.99625, 0.9886, 0.9897, 0.991, 0.9857, 0.99055, 0.9949, 0.9984, 0.9877, 0.9906, 0.9917, 0.99485, 0.9919, 0.98835, 0.9915, 0.98965, 0.99525, 0.99155, 0.98605]


In [None]:
print(val_accs)

[0.784, 0.7462, 0.8098, 0.7532, 0.7918, 0.776, 0.7858, 0.7584, 0.804, 0.8062, 0.8052, 0.8062, 0.7836, 0.7948, 0.8178, 0.7882, 0.7854, 0.7826, 0.7814, 0.7778, 0.786, 0.7888, 0.7762, 0.7702, 0.7634, 0.7966, 0.772, 0.7666, 0.7752, 0.7822, 0.7828, 0.7794, 0.7786, 0.7678, 0.7754, 0.788, 0.7768, 0.7708, 0.788, 0.7754, 0.7826, 0.7618, 0.7826, 0.735, 0.7862, 0.7768, 0.7706, 0.789, 0.7758, 0.7842]


In [None]:
tr_losses = []
for loss in train_losses:
  tr_losses.append(loss.item())
print(tr_losses)

[0.508279025554657, 0.4556257724761963, 0.43081802129745483, 0.4148216247558594, 0.40155887603759766, 0.37974247336387634, 0.3623133897781372, 0.3443458676338196, 0.31922510266304016, 0.3021552562713623, 0.28436917066574097, 0.2626524865627289, 0.23923951387405396, 0.21943800151348114, 0.19358503818511963, 0.16817794740200043, 0.14361096918582916, 0.11893610656261444, 0.10290876030921936, 0.07799740135669708, 0.06598580628633499, 0.04930972307920456, 0.05675511807203293, 0.035196464508771896, 0.03338271751999855, 0.04720931127667427, 0.0388021357357502, 0.025461044162511826, 0.042250294238328934, 0.033171799033880234, 0.023520763963460922, 0.013245430774986744, 0.03208798170089722, 0.030069351196289062, 0.025286640971899033, 0.04111939296126366, 0.02710670419037342, 0.015680862590670586, 0.006308150012046099, 0.03796546161174774, 0.025296764448285103, 0.024914640933275223, 0.014877719804644585, 0.024426382035017014, 0.03230440244078636, 0.023834871128201485, 0.03218914940953255, 0.0153

In [None]:
vl_losses = []
for loss in val_losses:
  vl_losses.append(loss.item())
print(vl_losses)

[0.4680269956588745, 0.5125997066497803, 0.43674135208129883, 0.5049386620521545, 0.4489496946334839, 0.4522791802883148, 0.44421684741973877, 0.5178857445716858, 0.44593745470046997, 0.4228784143924713, 0.42430832982063293, 0.4576728940010071, 0.49728474020957947, 0.538810670375824, 0.5338367223739624, 0.59158855676651, 0.6532549262046814, 0.7139144539833069, 0.7890828251838684, 0.8993056416511536, 0.9053544402122498, 1.0253123044967651, 1.0144762992858887, 1.212432861328125, 1.3518964052200317, 1.356855869293213, 1.367996096611023, 1.4851295948028564, 1.527214765548706, 1.499824047088623, 1.688446283340454, 1.620281457901001, 1.7207047939300537, 1.9576046466827393, 1.8936675786972046, 1.8254780769348145, 1.7708159685134888, 2.1894285678863525, 2.361193895339966, 2.001924753189087, 2.385425567626953, 1.9942110776901245, 2.229936361312866, 2.72424578666687, 2.2721760272979736, 2.2304704189300537, 2.4495716094970703, 2.6518123149871826, 2.613081693649292, 2.5146055221557617]


In [75]:
def run_one_epoch_cnn_rnn(epoch_idx, model1, model2, optimizer, train_dataloader, val_dataloader, train=True):
    model1.train()
    model2.train()
    hidden = None#model2.initHidden()
    # loop through the entire dataset once per epoch
    train_loss = 0.0
    train_acc = 0.0
    train_total = 0
    for batch_idx, (images, labels) in enumerate(train_dataloader):
        optimizer.zero_grad()
        images = images.to(device)
        labels = labels.to(device)
        model1_output = model1(images)
        #print("hi")
        predictions, hidden = model2(model1_output, hidden).squeeze()
        hidden = hidden.data
        acc_sum = 0
        for i, pred in enumerate(predictions):

          if round(pred.item()) == labels[i]:
            acc_sum += 1
        acc = acc_sum / labels.size(0)
        # acc = torch.sum(predictions == labels) / labels.size(0)
        loss = torch.nn.BCELoss()(predictions, labels.float())
        loss.backward()
        optimizer.step()

    # update statistics
        train_loss += (loss * len(images))
        train_acc += (acc * len(labels))
        train_total += len(labels)
    
    train_loss /= train_total
    train_acc /= train_total

    # model1.eval()
    # model2.eval()
    val_loss = 0
    val_acc = 0
    val_total = 0
    with torch.no_grad():
      for batch_idx, (images, labels) in enumerate(val_dataloader):
          images = images.to(device)
          labels = labels.to(device)
          model1_output = model1(images)
          predictions = model2(model1_output).squeeze()
          acc_sum = 0
          for i, pred in enumerate(predictions):
            if round(pred.item()) == labels[i]:
              acc_sum += 1
          acc = acc_sum / labels.size(0)
          # acc = torch.sum(predictions == labels) / labels.size(0)
          loss = torch.nn.BCELoss()(predictions, labels.float())

          # update statistics
          val_loss += (loss * len(images))
          val_acc += (acc * len(labels))
          val_total += len(labels)

      val_loss /= val_total
      val_acc /= val_total

    print(f"Epoch {epoch_idx + 1}:  val loss {val_loss :0.3f}, val acc {val_acc :0.3f}, train loss {train_loss :0.3f}, train acc {train_acc :0.3f}")

    # Detach tells torch to stop tracking a tensor's gradients
    return train_acc, train_loss.detach(), val_acc,val_loss.detach()

In [84]:
cnn_rnn= CNN()
cnn_rnn.to(device)
rnn = RNN()
rnn.to(device)

epochs = 50
lr = 1e-3

train_accs = []
train_losses = []
val_accs = []
val_losses = []
opt = torch.optim.Adam([{"params": cnn_fc.parameters()},{"params":fc.parameters()}], lr=lr)
for epoch in range(epochs):
  train_acc, train_loss, val_acc, val_loss = run_one_epoch_cnn_rnn(epoch, cnn_rnn, rnn, opt, train_dataloader, val_dataloader)
  train_accs.append(train_acc)
  train_losses.append(train_loss)
  val_accs.append(val_acc)
  val_losses.append(val_loss)
print(train_accs)
print(train_losses)
print(val_accs)
print(val_losses)

before reshape: torch.Size([32, 128, 4, 4])
hidden:  torch.Size([1, 32, 32])
after reshape: torch.Size([32, 128])


RuntimeError: ignored