In [6]:
import os
import time
import pandas as pd
import torch
from tempfile import TemporaryDirectory
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np

In [1]:
from custom_dataset import *

filters = ['Atelectasis', 'Effusion',]

csvs = {
    "train": "./train.csv",
    "val": "./test.csv",
    "test": "./test.csv",
}

limit = None

dataframes = build_dataframes(csvs, filters, limit)

for (phase, dataframe) in dataframes.items():
    print(f"{phase} dataset size: {len(dataframe)}")

train dataset size: 6010
val dataset size: 1797
test dataset size: 1797


In [2]:
def count_classes(dataframe):
    return dataframe.iloc[:, 1:].sum(axis=0).to_dict()


In [3]:
count_classes(dataframes["train"])

{'Atelectasis': 3135,
 'Cardiomegaly': 0,
 'Consolidation': 0,
 'Edema': 0,
 'Effusion': 2875,
 'Emphysema': 0,
 'Fibrosis': 0,
 'Hernia': 0,
 'Infiltration': 0,
 'Mass': 0,
 'Nodule': 0,
 'Pleural_Thickening': 0,
 'Pneumonia': 0,
 'Pneumothorax': 0,
 'Pneumoperitoneum': 0,
 'Pneumomediastinum': 0,
 'Subcutaneous Emphysema': 0,
 'Tortuous Aorta': 0,
 'Calcification of the Aorta': 0,
 'No Finding': 0}

In [4]:
count_classes(dataframes["val"])

{'Atelectasis': 868,
 'Cardiomegaly': 0,
 'Consolidation': 0,
 'Edema': 0,
 'Effusion': 929,
 'Emphysema': 0,
 'Fibrosis': 0,
 'Hernia': 0,
 'Infiltration': 0,
 'Mass': 0,
 'Nodule': 0,
 'Pleural_Thickening': 0,
 'Pneumonia': 0,
 'Pneumothorax': 0,
 'Pneumoperitoneum': 0,
 'Pneumomediastinum': 0,
 'Subcutaneous Emphysema': 0,
 'Tortuous Aorta': 0,
 'Calcification of the Aorta': 0,
 'No Finding': 0}

In [7]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
       mean=[0.485, 0.456, 0.406],  # default values for imagenet
       std=[0.229, 0.224, 0.225]
    ),
])

phs = ["train", "val", "test"]

image_datasets = {x: SingleLabelDataset(dataframes[x],
                                       'dataset/images',
                                       transform=transform)
                  for x in phs}

dataloaders = {x: DataLoader(image_datasets[x],
                             batch_size=8,
                             shuffle=True,
                             num_workers=24)
               for x in phs}

dataset_sizes = {x: len(image_datasets[x]) for x in phs}

class_names = image_datasets['train'].classes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Dsevice: ", device)

Dsevice:  cuda


In [8]:
dataset_sizes = {x: len(dataloaders[x].dataset) for x in phs}
print(dataset_sizes)

{'train': 6010, 'val': 1797, 'test': 1797}


In [16]:
from pretrained_models import *

models_d = {
    "googlenet": None,
    "vit_b_32": None,
    "vgg13": None,
    "resnet18": None,
    "resnet50": None,
    "densenet161": None
}

model_names = models_d.keys()
models_path = "./models/"

In [73]:
download_pretrained_models(model_names, models_path)

googlenet successfully downloaded and saved.
vit_b_32 successfully downloaded and saved.
vgg13 successfully downloaded and saved.
resnet18 successfully downloaded and saved.
resnet50 successfully downloaded and saved.
densenet161 successfully downloaded and saved.


In [17]:
models_d = load_models(models_d, "./models")



googlenet successfully loaded.
vit_b_32 successfully loaded.
vgg13 successfully loaded.
resnet18 successfully loaded.
resnet50 successfully loaded.
densenet161 successfully loaded.


In [81]:
from transfer_learning import *


#num_ftrs = model_ft.fc.in_features
#model_ft.fc = nn.Linear(num_ftrs, len(class_names))

for (name, model) in models_d.items():
    print(name)

    model_ft = model
    model_ft = model_ft.to(device)
    
    criterion = nn.CrossEntropyLoss()
    
    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
    
    # Decay LR by a factor of 0.1 every 6 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=6, gamma=0.1)
    
    model = train_model(dataloaders, model, criterion, optimizer_ft, exp_lr_scheduler,
                        num_epochs=10)
        

googlenet
Epoch 0/23
----------
train Loss: 0.1542 Acc: 0.9383
val Loss: 0.7530 Acc: 0.7674

Epoch 1/23
----------
train Loss: 0.1932 Acc: 0.9220
val Loss: 0.7157 Acc: 0.7446

Epoch 2/23
----------
train Loss: 0.1451 Acc: 0.9433
val Loss: 0.7692 Acc: 0.7529

Epoch 3/23
----------
train Loss: 0.1479 Acc: 0.9418
val Loss: 0.7521 Acc: 0.7696

Epoch 4/23
----------
train Loss: 0.1231 Acc: 0.9498
val Loss: 0.8948 Acc: 0.7590

Epoch 5/23
----------
train Loss: 0.1235 Acc: 0.9531
val Loss: 0.8274 Acc: 0.7713

Epoch 6/23
----------
train Loss: 0.0786 Acc: 0.9696
val Loss: 0.8155 Acc: 0.7724

Epoch 7/23
----------
train Loss: 0.0618 Acc: 0.9764
val Loss: 0.8147 Acc: 0.7707

Epoch 8/23
----------
train Loss: 0.0492 Acc: 0.9817
val Loss: 0.8310 Acc: 0.7746

Epoch 9/23
----------
train Loss: 0.0470 Acc: 0.9842
val Loss: 0.9105 Acc: 0.7735

Epoch 10/23
----------
train Loss: 0.0382 Acc: 0.9865
val Loss: 0.8866 Acc: 0.7757

Epoch 11/23
----------
train Loss: 0.0423 Acc: 0.9834
val Loss: 0.8956 Acc: 

In [6]:
destination = "./models_trained"

for (name, model) in models_d.items():
    torch.save(model.state_dict(), f"{destination}/{name}.pth")

AttributeError: 'NoneType' object has no attribute 'state_dict'

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# input example
y_true = torch.tensor([1, 0, 1, 2, 0, 1, 2, 2])
y_pred = torch.tensor([1, 0, 1, 2, 0, 2, 1, 2])

# tensor to numpy arrays
y_true_np = y_true.numpy()
y_pred_np = y_pred.numpy()

# calculate confusion matrix
conf_matrix = confusion_matrix(y_true_np, y_pred_np)

# Config grfic
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2)  

# heat map
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,
            xticklabels=['Clase 0', 'Clase 1', 'Clase 2'],
            yticklabels=['Clase 0', 'Clase 1', 'Clase 2'])

# Add tags and title
plt.xlabel("Predicciones")
plt.ylabel("Etiquetas Verdaderas")
plt.title("Matriz de Confusión")

# show grafic
plt.show()

In [5]:
models_d = load_models(models_d, "./models_trained")



googlenet successfully loaded.
vit_b_32 successfully loaded.
vgg13 successfully loaded.
resnet18 successfully loaded.
resnet50 successfully loaded.
densenet161 successfully loaded.


In [6]:
from transfer_learning import *

print("hi")

hi


In [7]:

for (name, model) in models_d.items():
    print(f"Model: {name}")
    test_model(model, dataloaders["test"])

Model: googlenet
Accuracy of Atelectasis: 76.61290322580645%
Accuracy of Effusion: 79.54790096878364%
Model: vit_b_32
Accuracy of Atelectasis: 61.75115207373272%
Accuracy of Effusion: 65.76964477933261%
Model: vgg13
Accuracy of Atelectasis: 80.76036866359448%
Accuracy of Effusion: 76.53390742734123%
Model: resnet18
Accuracy of Atelectasis: 80.29953917050692%
Accuracy of Effusion: 76.64155005382132%
Model: resnet50
Accuracy of Atelectasis: 80.99078341013825%
Accuracy of Effusion: 77.93326157158235%
Model: densenet161
Accuracy of Atelectasis: 75.11520737327189%
Accuracy of Effusion: 83.745963401507%
