In [27]:
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torchvision import datasets, transforms
# import numpy as np
# Import main packages
import os
import copy
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from typing import Optional, Callable
from sklearn.metrics import accuracy_score, f1_score
from sklearn.covariance import LedoitWolf
import json

import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter # For Tensorboard
from collections.abc import Mapping

torch.manual_seed(0)

<torch._C.Generator at 0x169252690>

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
class CoinDataset(Dataset):
    """DHMC dataset using 2 classes"""

    def __init__(self, features_path : str, label_path,train : bool = False) -> None:
        """
        Attributes:
            raw_data (list of dict): (M) List of M slides raw data as dictionaries. 
            train (bool): True if data are the training set. False otherwise
            
        Args:
            features_path (str): The path to the features file
            train (bool): Whether it is the training dataset or not
        """
        
        super().__init__()
        # Load raw data from path
        
        # self.raw_data = torch.load(features_path)
        # load labels from json file
        self.raw_data = []
        data_json = None
        with open(label_path, 'r') as f:
            data_json = json.load(f)
        for x in data_json:
            filename = x["filename"]
            img_path = f"{features_path}/{filename}"
            img = Image.open(img_path)

            mean=[0.485, 0.456, 0.406]
            std=[0.229, 0.224, 0.225]
            transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
            img = transform(img)
            # print(img_tr)
            # img = np.array(img_tr)
            # print(img)
            # img = img.flatten()
            self.raw_data.append({"image_features": img, "label": int(x["value"])})

            #open image
        # Set if training or not
        self.train = train

    def __len__(self) -> int:
        """Returns the length of the dataset

        Returns:
            int: The length M of the dataset
        """

        n_data = 0
        
        # ------------------
        # Your code here ... 
        # ------------------
        n_data = len(self.raw_data)
        return n_data
    
    def __getitem__(self, index : int):
        """Returns the entry at index from the dataset

        Args:
            index (int): the requested entry index of the dataset

        Returns:
            features (torch.Tensor): (N, d) Feature tensor of the selected slide with N patches and d feature dimensions
            label (int): Ground truth label {0, ..., n_classes}
            wsi_id (str): Name of the WSI as "DHMC_xxx" where xxx is a unique id of the slide (train == False only)
            coordinates (torch.Tensor): (N, 2) xy coordinates of the N patches of the selected slide (train == False only)
        """

        features = None
        label = None
        wsi_id = None
        coordinates = None
        
        # ------------------
        # Your code here ... 
        # ------------------
        features = torch.tensor(self.raw_data[index]["image_features"]).float()
        label = torch.tensor(self.raw_data[index]["label"])
        # coordinates = self.raw_data[index]["patch_coordinates"]
        # wsi_id = self.raw_data[index]["wsi_id"]
        # if self.train:
        return features, label
        # else:
        #     return features, label, wsi_id, coordinates

In [26]:
# Load all the img from output folder and make a dataset
coinDataset = CoinDataset('./output/', "train_data.json", train=True)
# trans = tranosforms.Compose([transforms.Resize(256),transforms.ToTensor()])


train_size = int(0.9* len(coinDataset))
val_size = len(coinDataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(coinDataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
# dataset = datasets.ImaVgeFolder('./output-1/', transform=transforms.ToTensor())

TypeError: random_split() got an unexpected keyword argument 'seed'

In [24]:
from torchvision.models import resnet18, ResNet18_Weights

model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Linear(256, 16)
)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [25]:

def train(model, train_loader, val_loader, optimizer, scheduler, criterion, epochs=10):

    steps = 0
    for epoch in range(epochs):
        for i, (data, target) in enumerate(train_loader):
        
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            loss.backward()
            optimizer.step()
            writer.add_scalar("Loss/train", loss, steps)
            steps+=1
            if i % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, i * len(data), len(train_loader.dataset),
                    100. * i / len(train_loader), loss.item()))
                

        running_vloss = []
        with torch.no_grad():
            for i, vdata in enumerate(val_loader):
                vinputs, vlabels = vdata
                voutputs = model(vinputs)
                vloss = criterion(voutputs, vlabels)
                running_vloss.append(vloss)

        avg_vloss = np.mean(running_vloss)
        print('Val Epoch: {}\tLoss: {:.6f}'.format(
            epoch, avg_vloss
        ))
        writer.add_scalar("Loss/val", avg_vloss, (epoch + 1))
        scheduler.step()
                
# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Define the scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Log in Tensorboard
writer = SummaryWriter('runs/simple_restnet18_1e-4')

train(model, train_loader, val_loader, optimizer, scheduler, nn.CrossEntropyLoss(), epochs=8)
writer.flush()
writer.close()

  features = torch.tensor(self.raw_data[index]["image_features"]).float()


Val Epoch: 0	Loss: 2.837209
Val Epoch: 1	Loss: 2.793255
Val Epoch: 2	Loss: 2.827165
Val Epoch: 3	Loss: 2.816723
Val Epoch: 4	Loss: 2.830498
Val Epoch: 5	Loss: 2.830173
Val Epoch: 6	Loss: 2.891581
Val Epoch: 7	Loss: 2.817993
