In [1]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torchvision.transforms import Normalize, Resize, ToTensor
from torch.utils.tensorboard import SummaryWriter

In [2]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image
import os
import time
import datetime
import pandas as pd
from XrayDataset import XrayDataset


In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"

In [4]:
checkpoint = 'checkpoint-resnext'

In [5]:
log = "densenet161"

os.path.join(os.getcwd(), 'logs', log)

'/lusnlsas/ramkik_data/covid19/covid/logs/densenet161'

In [6]:

tf_writer = SummaryWriter(os.path.join(os.getcwd(), 'logs', log))


In [7]:
def check_rootfolders():
    """Create log and model folder"""
    folders_util = [checkpoint]
    for folder in folders_util:
        if not os.path.exists(folder):
            print('creating folder ' + folder)
            os.mkdir(folder)

check_rootfolders()

In [8]:
def save_checkpoint(epoch , state):
    filename = '%s/ckpt-%s.pth.tar' % (checkpoint, epoch)
    torch.save(state, filename)

In [9]:
#model = models.resnext50_32x4d(pretrained=True)
model = models.densenet161(pretrained=True)

In [10]:
model

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (rel

In [11]:
# model.fc = nn.Sequential(nn.Linear(2048, 512),
#                                  nn.ReLU(),
#                                  nn.Dropout(0.4),
#                                  nn.Linear(512, 3))

model.classifier = nn.Sequential(nn.Linear(2208, 512),
                                 nn.ReLU(),
                                 nn.Dropout(0.4),
                                 nn.Linear(512, 3))
                                # nn.Softmax(dim=1))

In [12]:
#model.to(device)
model = torch.nn.DataParallel(model).cuda()


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
df = pd.read_csv(os.path.join(os.getcwd(), 'data', 'train.csv'))
df

Unnamed: 0.1,Unnamed: 0,filename,label
0,1038,0c2e9b99-9a8f-4b44-854e-acd181a0208c.jpg,0
1,2922,34fdff09-5bc2-4df5-b8cf-3c37662037c8.jpg,2
2,1174,0ebc8268-df3d-45d8-8ee7-b34880c62830.jpg,2
3,341,06f1d0a2-d8c5-4229-9944-59da85c96b81.jpg,0
4,280,06951c33-b247-4daf-a087-cc082f83238b.jpg,0
...,...,...,...
940,1200,0f8c91da-7e03-480e-8760-1604b1d53c97.jpg,0
941,1151,0dbb83c1-2214-4152-ac69-d1e7e25453cb.jpg,0
942,499,081e308c-0134-4ba3-b745-f632e37a83a1.jpg,0
943,663,095d6b7c-fa53-4f06-90b9-5c5f76038f04.jpg,2


In [14]:
df.iloc[0].filename

'0c2e9b99-9a8f-4b44-854e-acd181a0208c.jpg'

In [15]:
len(df.index)

945

In [16]:
df.shape[0]

945

In [17]:
train_transforms = transforms.Compose([
                                     #transforms.Grayscale(1),
                                      # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
                                       transforms.RandomHorizontalFlip(p=0.5),
                                        transforms.Resize((224,224)),
                                       transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                       ])
test_transforms = transforms.Compose([
#                                     transforms.Grayscale(1),
                                      transforms.Resize((224,224)),
                                      transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                      ])

In [18]:
train_dataset = XrayDataset( 'data', 'train', 'train.csv', train_transforms )

test_dataset = XrayDataset('data', 'test', 'test.csv', test_transforms )

image, label = next(iter(train_dataset))

In [19]:
label

0

In [20]:
batch_size = 128

In [21]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,  shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size,  shuffle=False, num_workers=4)

In [22]:
image, label = next(iter(train_dataloader))

In [23]:
label

tensor([0, 2, 0, 0, 0, 2, 0, 0, 2, 0, 1, 0, 1, 0, 0, 2, 2, 2, 0, 2, 0, 2, 0, 1,
        0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 1, 2, 2, 0, 2, 2, 2, 2, 2,
        2, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 0, 2, 1, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2,
        2, 0, 2, 2, 1, 0, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0, 2, 2, 1, 0, 1, 2, 1, 0,
        0, 2, 2, 1, 0, 1, 0, 2, 0, 2, 2, 0, 2, 1, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2,
        0, 2, 2, 1, 0, 0, 2, 0])

In [24]:
def validate(val_loader, model, criterion, epoch):
    model.eval()

    start_val_time = time.time()

    correct = 0
    total = 0
    running_loss = 0.0
    count_loop = 0
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            count_loop +=1
            target = target.cuda()

            # compute output
            output = model(input.cuda())
            loss = criterion(output, target)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            running_loss += loss.item()

            # measure accuracy and record loss
            #prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
    accurecy = 100 * correct / total
    end_val_time = time.time()

    currentDT = datetime.datetime.now()        
    print(str(currentDT), "=== Validation Loss : ", running_loss, "  Accurecy : ", accurecy, "count loop : ", count_loop, "   Validation Time :  ", (end_val_time-start_val_time) )
    
    if tf_writer is not None:
        tf_writer.add_scalar('accurecy/test', accurecy, epoch)
        tf_writer.add_scalar('loss/test', running_loss, epoch)
    return running_loss

In [25]:
epochs = 1000
steps = 0
print_every = 1
  
train_losses, test_losses = [], []

In [None]:
for epoch in range(epochs):
    
    running_loss = 0.0     
    correct = 0
    total = 0 
    
    model.train()

    for inputs, labels in train_dataloader:
        steps += 1
        inputs, labels = inputs.cuda(), labels.cuda()
        optimizer.zero_grad()
        logps = model(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        _, predicted = torch.max(logps.data, 1)
        #print(predicted)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        #print(correct)
    
    accurecy = 100 * correct / total
    tf_writer.add_scalar('loss/train', running_loss, epoch)
    tf_writer.add_scalar('accurecy/train', accurecy, epoch)
    print( " === epoch : ", epoch, "=== Training Loss : ", running_loss, "  Accurecy : ", accurecy )

    validate(test_dataloader, model, criterion, epoch)
    tf_writer.flush()
    
    
    save_checkpoint(epoch, {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                
            })

tf_writer.close()


 === epoch :  0 === Training Loss :  7.750190317630768   Accurecy :  50.15873015873016
2020-04-18 09:44:19.242387 === Validation Loss :  2.896541118621826   Accurecy :  44.76190476190476 count loop :  1    Validation Time :   3.070073127746582
 === epoch :  1 === Training Loss :  5.942614018917084   Accurecy :  66.87830687830687
2020-04-18 09:44:29.932799 === Validation Loss :  1.6128817796707153   Accurecy :  60.95238095238095 count loop :  1    Validation Time :   2.524005174636841
 === epoch :  2 === Training Loss :  4.701593577861786   Accurecy :  73.01587301587301
2020-04-18 09:44:40.377548 === Validation Loss :  2.630441427230835   Accurecy :  58.095238095238095 count loop :  1    Validation Time :   2.430443525314331
 === epoch :  3 === Training Loss :  4.306457221508026   Accurecy :  77.14285714285714
2020-04-18 09:44:51.166462 === Validation Loss :  2.230482816696167   Accurecy :  55.23809523809524 count loop :  1    Validation Time :   2.4487218856811523
 === epoch :  4 === T