In [1]:
!unzip /content/drive/MyDrive/Graduation_Project/CheXpert-v1.0-small.zip > /dev/null

In [1]:
from torch import nn, sqrt
import torch
import sys
from math import sqrt
from MBConv import MBConvBlock
from SelfAttention import ScaledDotProductAttention

from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T

import pandas as pd
import numpy as np

In [21]:
class CoAtNet(nn.Module):
    def __init__(self, in_ch, image_size, out_chs=[64,96,192,384,768]):
        super().__init__()
        self.out_chs = out_chs
        self.maxpool2d = nn.MaxPool2d(kernel_size=2, stride=2)
        self.maxpool1d = nn.MaxPool1d(kernel_size=2, stride=2)

        self.s0 = nn.Sequential(
            nn.Conv2d(in_ch, in_ch, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_ch,in_ch,kernel_size=3, padding=1)
        )
        self.mlp0 = nn.Sequential(
            nn.Conv2d(in_ch, out_chs[0], kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(out_chs[0], out_chs[0], kernel_size=1)
        )
        
        self.s1 = MBConvBlock(ksize=3, input_filters=out_chs[0], output_filters=out_chs[0], image_size=image_size//2)
        self.mlp1 = nn.Sequential(
            nn.Conv2d(out_chs[0], out_chs[1], kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(out_chs[1], out_chs[1], kernel_size=1)
        )

        self.s2 = MBConvBlock(ksize=3, input_filters=out_chs[1], output_filters=out_chs[1], image_size=image_size//4)
        self.mlp2 = nn.Sequential(
            nn.Conv2d(out_chs[1],out_chs[2],kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(out_chs[2],out_chs[2],kernel_size=1)
        )

        self.s3 = ScaledDotProductAttention(out_chs[2],out_chs[2]//8,out_chs[2]//8,8)
        self.mlp3 = nn.Sequential(
            nn.Linear(out_chs[2],out_chs[3]),
            nn.ReLU(),
            nn.Linear(out_chs[3],out_chs[3])
        )

        self.s4 = ScaledDotProductAttention(out_chs[3], out_chs[3]//8, out_chs[3]//8, 8)
        self.mlp4 = nn.Sequential(
            nn.Linear(out_chs[3],out_chs[4]),
            nn.ReLU(),
            nn.Linear(out_chs[4],out_chs[4])
        )

        self.mlp5 = nn.Sequential(
            nn.Conv2d(out_chs[4],out_chs[4],kernel_size=5),
            nn.ReLU()
        )

    def forward(self, x) :
        #print(x.shape)
        B, C, H, W = x.shape

        #stage0
        y = self.mlp0(self.s0(x))
        y = self.maxpool2d(y)

        #stage1
        y = self.mlp1(self.s1(y))
        y = self.maxpool2d(y)

        #stage2
        y = self.mlp2(self.s2(y))
        y = self.maxpool2d(y)

        #stage3
        y = y.reshape(B,self.out_chs[2],-1).permute(0,2,1) #B,N,C
        y = self.mlp3(self.s3(y,y,y))
        y = self.maxpool1d(y.permute(0,2,1)).permute(0,2,1)

        #stage4
        y = self.mlp4(self.s4(y,y,y))
        y = self.maxpool1d(y.permute(0,2,1))

        #stage5
        #y = self.mlp5(y.unsqueeze(1))
        #y = self.AvgPool2d(18)(y)
        #print(y.shape)

        #y = torch.nn.Softmax(dim=0)(y)
        #print(y.shape)
        # N = y.shape[-1]
        # y = y.reshape(B,self.out_chs[4],int(sqrt(N)),int(sqrt(N)))

        return y

In [3]:
import os 
import cv2 
import matplotlib.pyplot as plt

class croppedDataset(Dataset):
    'Characterizes a dataset for PyTorch'

    def __init__(self, main_dir, meta_data, image_size):
        'Initialization'
        self.meta_data = meta_data
        self.main_dir = main_dir
        # self.image_size = image_size
        self.transform = T.Compose([T.ToPILImage(),
                                    T.CenterCrop(0.75 * 64),
                                    T.Resize(image_size),
                                    T.RandomHorizontalFlip(),
                                    T.ToTensor()])

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.meta_data)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        image = cv2.imread(os.path.join(self.main_dir, self.meta_data.iloc[index, 0]))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #image = cv2.resize(image, (self.image_size, self.image_size))
        x = self.transform(image).squeeze(0)
        #x = torch.from_numpy(image).permute((2, 0, 1)).unsqueeze(0).float()
        y = torch.from_numpy(self.meta_data.iloc[index, 5:].astype(np.float32).values)
        return x, y

In [4]:
pure_train = pd.read_csv('pure_train.csv')
pure_val = pd.read_csv('pure_val.csv')

In [5]:
# def show_images(images, nmax=64):
#     fig, ax = plt.subplots(figsize=(8, 8))
#     ax.set_xticks([]); ax.set_yticks([])
#     ax.imshow(make_grid((images.detach()[:nmax]), nrow=8).permute(1, 2, 0))

# def show_batch(dl, nmax=64):
#     for images in dl:
#         show_images(images, nmax)
#         break

In [6]:
batch_size = 10
data_path  = "/content"
image_size = 256

cropped_dataset = croppedDataset(main_dir=data_path, meta_data=pure_train, image_size=image_size)
train_dl = DataLoader(cropped_dataset, batch_size, shuffle=True)

In [27]:
#x = torch.randn(1,3,224,224)
coatnet = CoAtNet(3, 256)
# y = coatnet(train_dl)
# print(y.shape)

In [28]:
from torchsummary import summary

summary(coatnet, (3, 256, 256), batch_size=10, device='cpu')

torch.Size([2, 3, 256, 256])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [10, 3, 256, 256]              84
              ReLU-2          [10, 3, 256, 256]               0
            Conv2d-3          [10, 3, 256, 256]              84
            Conv2d-4         [10, 64, 256, 256]             256
              ReLU-5         [10, 64, 256, 256]               0
            Conv2d-6         [10, 64, 256, 256]           4,160
         MaxPool2d-7         [10, 64, 128, 128]               0
         ZeroPad2d-8         [10, 64, 130, 130]               0
Conv2dStaticSamePadding-9         [10, 64, 128, 128]             576
      BatchNorm2d-10         [10, 64, 128, 128]             128
MemoryEfficientSwish-11         [10, 64, 128, 128]               0
         Identity-12             [10, 64, 1, 1]               0
Conv2dStaticSamePadding-13             [10, 16, 1, 1]           1,

In [54]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(coatnet.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dl):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = coatnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')