In [1]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torchvision.transforms import Normalize, Resize, ToTensor
from torch.utils.tensorboard import SummaryWriter

In [2]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image
import os
import time
import datetime
import pandas as pd
from XrayDataset import XrayDataset


In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"

In [4]:
checkpoint = 'checkpoint-resnet50'

In [5]:
log = "resnet50"

os.path.join(os.getcwd(), 'logs', log)

'/lusnlsas/ramkik_data/covid19/covid/logs/inception_v3'

In [6]:

tf_writer = SummaryWriter(os.path.join(os.getcwd(), 'logs', log))


In [7]:
def check_rootfolders():
    """Create log and model folder"""
    folders_util = [checkpoint]
    for folder in folders_util:
        if not os.path.exists(folder):
            print('creating folder ' + folder)
            os.mkdir(folder)

check_rootfolders()

In [8]:
def save_checkpoint(epoch , state):
    filename = '%s/ckpt-%s.pth.tar' % (checkpoint, epoch)
    torch.save(state, filename)

In [10]:
model = models.resnet50(pretrained=True)
model = models.inception_v3(pretrained=True)
model.aux_logits=False


In [11]:
model

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, t

In [12]:
# model.fc = nn.Sequential(nn.Linear(2048, 512),
#                                  nn.ReLU(),
#                                  nn.Dropout(0.4),
#                                  nn.Linear(512, 3))

model.fc = nn.Sequential(nn.Linear(2048, 512),
                                 nn.ReLU(),
                                 nn.Dropout(0.4),
                                 nn.Linear(512, 3))
                                # nn.Softmax(dim=1))

In [13]:
#model.to(device)
model = torch.nn.DataParallel(model).cuda()


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [14]:
df = pd.read_csv(os.path.join(os.getcwd(), 'data', 'train.csv'))
df

Unnamed: 0.1,Unnamed: 0,filename,label
0,1038,0c2e9b99-9a8f-4b44-854e-acd181a0208c.jpg,0
1,2922,34fdff09-5bc2-4df5-b8cf-3c37662037c8.jpg,2
2,1174,0ebc8268-df3d-45d8-8ee7-b34880c62830.jpg,2
3,341,06f1d0a2-d8c5-4229-9944-59da85c96b81.jpg,0
4,280,06951c33-b247-4daf-a087-cc082f83238b.jpg,0
...,...,...,...
940,1200,0f8c91da-7e03-480e-8760-1604b1d53c97.jpg,0
941,1151,0dbb83c1-2214-4152-ac69-d1e7e25453cb.jpg,0
942,499,081e308c-0134-4ba3-b745-f632e37a83a1.jpg,0
943,663,095d6b7c-fa53-4f06-90b9-5c5f76038f04.jpg,2


In [15]:
df.iloc[0].filename

'0c2e9b99-9a8f-4b44-854e-acd181a0208c.jpg'

In [16]:
len(df.index)

945

In [17]:
df.shape[0]

945

In [18]:
train_transforms = transforms.Compose([
                                    transforms.RandomRotation(30),
                                    transforms.RandomResizedCrop(299),
                                    transforms.RandomHorizontalFlip(),
                                    transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                       ])
test_transforms = transforms.Compose([
                                        transforms.RandomResizedCrop(299),

                                     # transforms.Resize((224,224)),
                                      transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                      ])

In [19]:
train_dataset = XrayDataset( 'data', 'train', 'train.csv', train_transforms )

test_dataset = XrayDataset('data', 'test', 'test.csv', test_transforms )

image, label = next(iter(train_dataset))

In [20]:
label

0

In [21]:
batch_size = 128

In [22]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,  shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size,  shuffle=False, num_workers=4)

In [23]:
image, label = next(iter(train_dataloader))

In [24]:
label

tensor([1, 1, 0, 2, 0, 2, 0, 1, 2, 0, 1, 2, 0, 0, 2, 2, 2, 0, 2, 2, 1, 2, 2, 0,
        0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 2, 0, 2, 1, 2, 2, 2,
        0, 0, 0, 1, 0, 2, 1, 0, 2, 0, 2, 0, 0, 0, 2, 2, 1, 2, 2, 2, 2, 2, 0, 2,
        0, 1, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 2, 0,
        2, 1, 1, 2, 1, 0, 0, 0, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0,
        0, 2, 2, 2, 2, 0, 0, 0])

In [25]:
def validate(val_loader, model, criterion, epoch):
    model.eval()

    start_val_time = time.time()

    correct = 0
    total = 0
    running_loss = 0.0
    count_loop = 0
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            count_loop +=1
            target = target.cuda()

            # compute output
            output = model(input.cuda())
            loss = criterion(output, target)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            running_loss += loss.item()

            # measure accuracy and record loss
            #prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
    accurecy = 100 * correct / total
    end_val_time = time.time()

    currentDT = datetime.datetime.now()        
    print(str(currentDT), "=== Validation Loss : ", running_loss, "  Accurecy : ", accurecy, "count loop : ", count_loop, "   Validation Time :  ", (end_val_time-start_val_time) )
    
    if tf_writer is not None:
        tf_writer.add_scalar('accurecy/test', accurecy, epoch)
        tf_writer.add_scalar('loss/test', running_loss, epoch)
    return running_loss

In [26]:
epochs = 1000
steps = 0
print_every = 1
  
train_losses, test_losses = [], []

In [27]:
for epoch in range(epochs):
    
    running_loss = 0.0     
    correct = 0
    total = 0 
    
    model.train()

    for inputs, labels in train_dataloader:
        steps += 1
        inputs, labels = inputs.cuda(), labels.cuda()
        #print(inputs.shape)
        optimizer.zero_grad()
        logps = model(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        _, predicted = torch.max(logps.data, 1)
        #print(predicted)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        #print(correct)
    
    accurecy = 100 * correct / total
    tf_writer.add_scalar('loss/train', running_loss, epoch)
    tf_writer.add_scalar('accurecy/train', accurecy, epoch)
    print( " === epoch : ", epoch, "=== Training Loss : ", running_loss, "  Accurecy : ", accurecy )

    validate(test_dataloader, model, criterion, epoch)
    tf_writer.flush()
    
    
    save_checkpoint(epoch, {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                
            })

tf_writer.close()


torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([49, 3, 299, 299])
 === epoch :  0 === Training Loss :  7.674834609031677   Accurecy :  49.1005291005291
2020-04-18 20:48:18.854447 === Validation Loss :  1.3249166011810303   Accurecy :  45.714285714285715 count loop :  1    Validation Time :   2.3860104084014893
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([128, 3, 299, 299])
torch.Size([49, 3, 299, 299])
 === epoch :  1 === Training Loss :  7.0822913646698   Accurecy :  55.34391534391534
2020-04-18 20:48:29.114669 === Validation Loss :  1.3818206787109375   Accurecy :  42.857142857142854 count loop :  1    Validation Time :   2.4293394088745117
torch.Size([128

KeyboardInterrupt: 