In [1]:
import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

In [2]:
!wget -q https://raw.githubusercontent.com/tsunrise/colab-github/main/colab_github.py
import colab_github
colab_github.github_auth(persistent_key=True)

# method to import git data into colab taken from:
# https://github.com/tsunrise/colab-github

Mounted at /content/drive/
Looks that a private key is already created. If you have already push it to github, no action required.
 Otherwise, Please go to https://github.com/settings/ssh/new to upload the following key: 
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKsdgQltK+JTZbl5J1o7j408pGhjNyf4YUbWJmSugvZz root@b884b6fcbf02

Please use SSH method to clone repo.


In [3]:
!git clone git@github.com:jonq-q/Comp432-GroupQ.git

Cloning into 'Comp432-GroupQ'...
remote: Enumerating objects: 18054, done.[K
remote: Total 18054 (delta 0), reused 0 (delta 0), pack-reused 18054[K
Receiving objects: 100% (18054/18054), 1.14 GiB | 30.87 MiB/s, done.
Resolving deltas: 100% (17/17), done.
Updating files: 100% (18018/18018), done.


In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
dataset = datasets.ImageFolder("/content/Comp432-GroupQ/Dataset 1", transforms.Compose([
        torchvision.transforms.Resize((224,224)),
        torchvision.transforms.ColorJitter(hue=.05, saturation=.05),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))

train_set, test_set = torch.utils.data.random_split(dataset,[0.7,0.3])                # splits dataset into specified ratios
train_loader = DataLoader(train_set,shuffle=True,batch_size=16, num_workers=0)         # create train loader
test_loader = DataLoader(test_set,batch_size=16, num_workers=0)  # create test loader

In [6]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=False)

if torch.cuda.is_available():
    model = model.to('cuda')

loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip


In [7]:
num_epochs = 10
train_losses = []
train_accuracies = []

for epoch in range(num_epochs):
    model.train()
    current_loss = 0.0
    correct = 0
    total = 0
    round = 0

    for inputs, labels in train_loader:
        #print(inputs.shape)
        #print(inputs)
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer.zero_grad()
        y_pred = model(inputs)
        l = loss(y_pred, labels)
        l.backward()
        optimizer.step()
        #round += 1
        #print(round)

        current_loss += l.item()
        _, predicted = torch.max(y_pred.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    train_loss = current_loss / len(train_loader)
    train_accuracies.append(train_accuracy)
    train_losses.append(train_loss)
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.2f}%')

Epoch 1/10, Loss: 0.5883, Accuracy: 95.76%
Epoch 2/10, Loss: 0.0065, Accuracy: 100.00%
Epoch 3/10, Loss: 0.0028, Accuracy: 100.00%
Epoch 4/10, Loss: 0.0016, Accuracy: 100.00%
Epoch 5/10, Loss: 0.0011, Accuracy: 100.00%
Epoch 6/10, Loss: 0.0008, Accuracy: 100.00%
Epoch 7/10, Loss: 0.0006, Accuracy: 100.00%
Epoch 8/10, Loss: 0.0004, Accuracy: 100.00%
Epoch 9/10, Loss: 0.0003, Accuracy: 100.00%
Epoch 10/10, Loss: 0.0003, Accuracy: 100.00%


In [8]:
val_loss = []

with torch.no_grad():
          model.eval()
          running_loss = 0.0
          running_score = 0.0
          for image,label in test_loader:
                image = image.to('cuda')
                label = label.to('cuda')
                optimizer.zero_grad()
                y_pred = model.forward(image)
                l = loss(y_pred,label)
                running_loss += l.item()

                val, index_ = torch.max(y_pred,axis=1)
                running_score += torch.sum(index_ == label.data).item()

          epoch_score = running_score/len(test_loader.dataset)
          epoch_loss = running_loss/len(test_loader.dataset)
          val_loss.append(epoch_loss)
          print("Validation loss: {}, accuracy: {}".format(epoch_loss,epoch_score))

          all_predictions = []
          true_labels = []
          for inputs, labels in test_loader:
              inputs, labels = inputs.to('cuda'), labels.to('cuda')
              outputs = model.forward(inputs)
              _, predicted = torch.max(outputs.data, 1)
              all_predictions.extend(predicted.cpu().numpy())
              true_labels.extend(labels.cpu().numpy())

classification_rep = classification_report(true_labels, all_predictions, target_names=dataset.classes)
print(classification_rep)

Validation loss: 1.414547329962564e-05, accuracy: 1.0
                    precision    recall  f1-score   support

Colorectal Cancer        1.00      1.00      1.00      1800

          accuracy                           1.00      1800
         macro avg       1.00      1.00      1.00      1800
      weighted avg       1.00      1.00      1.00      1800



**Save trained model from dataset 1**

In [11]:
torch.save(model.state_dict(), '/content/drive/My Drive/TrainedModel')

In [12]:
pretrained_model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=False)
pretrained_model.load_state_dict(torch.load('/content/drive/My Drive/TrainedModel', map_location=torch.device('cuda')))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


<All keys matched successfully>

In [13]:
pretrained_model.to('cuda')

with torch.no_grad():
          pretrained_model.eval()
          all_predictions = []
          true_labels = []
          for inputs, labels in test_loader:
              inputs, labels = inputs.to('cuda'), labels.to('cuda')
              outputs = pretrained_model.forward(inputs)
              _, predicted = torch.max(outputs.data, 1)
              all_predictions.extend(predicted.cpu().numpy())
              true_labels.extend(labels.cpu().numpy())

classification_rep = classification_report(true_labels, all_predictions, target_names=dataset.classes)
print(classification_rep)

                    precision    recall  f1-score   support

Colorectal Cancer        1.00      1.00      1.00      1800

          accuracy                           1.00      1800
         macro avg       1.00      1.00      1.00      1800
      weighted avg       1.00      1.00      1.00      1800



In [14]:
from sklearn.metrics import multilabel_confusion_matrix

confMatrix = multilabel_confusion_matrix(true_labels, all_predictions, labels=[0, 1, 2])
print(confMatrix)

[[[   0    0]
  [   0 1800]]

 [[1800    0]
  [   0    0]]

 [[1800    0]
  [   0    0]]]


In [15]:
enumerate(train_loader)

<enumerate at 0x7d8447e39640>

In [16]:
pretrained_model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [17]:
pretrained_model.children()

<generator object Module.children at 0x7d8447e136f0>

In [18]:
device=torch.device("cuda:0"if torch.cuda.is_available() else "cpu")

In [19]:
from typing_extensions import Self
print(list(pretrained_model.children()))
#features=list(pretrained_model.children())[:-2] #rid pooling and fc
features=list(pretrained_model.children())[:-1] #only remove fc
model_out=nn.Sequential(*features).to(device)
def for_feature(loader):
  data_tmp=[]
  label_tmp=[]

  with torch.no_grad():
      for img,label in loader:
        img=img.to(device)
       # labels=labels.to(device)
        #labels=torch.torch.tensor(labels, dtype=torch.long)

        output1=model_out(img)

       # labels1=np.array([int(i) for in in lablels])
       # for i in labels1:
        #    label1=i
         #   lasds.append(labels)
          #  for i in outputs:
           #     tmp.append(i)
            #labels = labels.to(DEVICE)
            #outputs = torch.tensor(outputs)
           # outputs = outputs.to(DEVICE)
           ## loss = loss_fn(outputs, labels)
          #  test_loss += loss.item()
           # ps = torch.exp(outputs)
           # top_p, top_class = ps.topk(1, dim=1)
          #  equals = top_class == labels.view(*top_class.shape)
          #  accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
        #acc.append(accuracy / len(test_loader))

#lapsds = torch.Tensor(lasds)
#tmp = torch.Tensor(tmp)
#print(acc)
# with open('ResNet_Test_FTLoss.txt', 'w') as f:
#     for item in loss_FocalLoss:
#         f.write("%s\n" % item)
        #output=nn.functional.softmax(output1,dim=0)#not needed i think
        data_tmp.append(output1.cpu().numpy())
        label_tmp.append(label.cpu().numpy())
        #print('label: ' +str(label))



  return np.vstack(data_tmp), np.hstack(label_tmp)

#np.save("Feature_ResNet_Test", tmp)
#np.save("Label_ResNet_Test", lasds)




[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bia

In [20]:
trained_feature, trained_label=for_feature(train_loader)

In [21]:
trained_feature

array([[[[1.4567748 ]],

        [[2.1761758 ]],

        [[0.42183247]],

        ...,

        [[1.8665917 ]],

        [[0.02740316]],

        [[0.32328293]]],


       [[[1.1903226 ]],

        [[0.3890672 ]],

        [[0.17162059]],

        ...,

        [[0.72148335]],

        [[1.0154445 ]],

        [[0.29301634]]],


       [[[1.4535049 ]],

        [[2.1874237 ]],

        [[0.42623675]],

        ...,

        [[1.9371736 ]],

        [[0.02388712]],

        [[0.31092116]]],


       ...,


       [[[1.5259755 ]],

        [[2.244106  ]],

        [[0.44283777]],

        ...,

        [[1.9384283 ]],

        [[0.04457682]],

        [[0.33210197]]],


       [[[1.4833624 ]],

        [[2.2563384 ]],

        [[0.42646915]],

        ...,

        [[1.9350481 ]],

        [[0.02035372]],

        [[0.32039255]]],


       [[[1.1620165 ]],

        [[0.37747237]],

        [[0.19465993]],

        ...,

        [[0.7225223 ]],

        [[0.47790852]],

        [[0.30377

In [22]:
trained_feature.shape

(4200, 512, 1, 1)

In [23]:
trained_feature_n=trained_feature.reshape(4200,512)

In [24]:
from scipy import stats
stats.describe(trained_label)
#trained_label.describe()

DescribeResult(nobs=4200, minmax=(0, 0), mean=0.0, variance=0.0, skewness=nan, kurtosis=nan)

In [25]:
trained_label.shape

(4200,)

In [26]:
#T-SNE
from sklearn.manifold import TSNE
import plotly.express as px
#plot_matrix_grid(model.coef_[0].T.reshape(-1,224,224))

In [27]:
tsne=TSNE(n_components=2, random_state=0, learning_rate='auto')
projections=tsne.fit_transform(trained_feature_n)
fig=px.scatter(projections[:,0], projections[:,1], color=trained_label,labels={"color":""})
fig.show()

In [None]:
#task 2 Pretrained ResNet18 applied to Dataset2 & 3
dataset2 = datasets.ImageFolder("/content/drive/My Drive/Comp 432/Dataset2", transforms.Compose([
        torchvision.transforms.Resize((224,224)),
        torchvision.transforms.ColorJitter(hue=.05, saturation=.05),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.RandomRotation(20),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))

train_set, test_set = torch.utils.data.random_split(dataset,[0.7,0.3])                # splits dataset into specified ratios
train_loader = DataLoader(train_set,shuffle=True,batch_size=16, num_workers=0)         # create train loader
test_loader = DataLoader(test_set,batch_size=16, num_workers=0)  # create test loader


In [None]:
val_loss = []

with torch.no_grad():
          pretrained_model.eval()
          running_loss = 0.0
          running_score = 0.0
          for image,label in test_loader:
                image = image.to('cuda')
                label = label.to('cuda')
                optimizer.zero_grad()
                y_pred = model.forward(image)
                l = loss(y_pred,label)
                running_loss += l.item()

                val, index_ = torch.max(y_pred,axis=1)
                running_score += torch.sum(index_ == label.data).item()

          epoch_score = running_score/len(test_loader.dataset)
          epoch_loss = running_loss/len(test_loader.dataset)
          val_loss.append(epoch_loss)
          print("Validation loss: {}, accuracy: {}".format(epoch_loss,epoch_score))

          all_predictions = []
          true_labels = []
          for inputs, labels in test_loader:
              inputs, labels = inputs.to('cuda'), labels.to('cuda')
              outputs = model.forward(inputs)
              _, predicted = torch.max(outputs.data, 1)
              all_predictions.extend(predicted.cpu().numpy())
              true_labels.extend(labels.cpu().numpy())

classification_rep = classification_report(true_labels, all_predictions, target_names=dataset2.classes)
print(classification_rep)

Validation loss: 0.629561579320063, accuracy: 0.0


ValueError: ignored

In [None]:
print(dataset2.classes)

['Dataset 2', '__MACOSX']


In [None]:
import torchvision.models as models

In [None]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
VGG_16=models.vgg16(pretrained=True)



In [None]:
#loss = nn.CrossEntropyLoss()
#optimizer_vgg16= optim.SGD(VGG_16.parameters(), lr=0.0001,momentum=0.9)