In [None]:
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import torch
import torchvision
from torchvision import transforms
import torchmetrics
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt


# In[2]:


import pytorch_lightning as pl


# In[3]:


def load_file(path):
    return np.load(path).astype(np.float32)


# In[4]:


train_transforms = transforms.Compose([
                                    transforms.ToTensor(),  # Convert numpy array to tensor
                                    transforms.Normalize(0.49, 0.248),  # Use mean and std from preprocessing notebook
                                    transforms.RandomAffine( # Data Augmentation
                                        degrees=(-5, 5), translate=(0, 0.05), scale=(0.9, 1.1)),
                                        transforms.RandomResizedCrop((224, 224), scale=(0.35, 1))

])

val_transforms = transforms.Compose([
                                    transforms.ToTensor(),  # Convert numpy array to tensor
                                    transforms.Normalize([0.49], [0.248]),  # Use mean and std from preprocessing notebook
])




# In[5]:


train_dataset = torchvision.datasets.DatasetFolder(
    "C:/Users/Owner/Downloads/Udemy/Pytorch/04-Pneumonia-Classification/04-Pneumonia-Classification/Processed/train/",
    loader=load_file, extensions="npy", transform=train_transforms)

val_dataset = torchvision.datasets.DatasetFolder(
    "C:/Users/Owner/Downloads/Udemy/Pytorch/04-Pneumonia-Classification/04-Pneumonia-Classification/Processed/val/",
    loader=load_file, extensions="npy", transform=val_transforms)



# In[6]:


train_dataset


# In[7]:


val_dataset


# In[8]:


fig, axis = plt.subplots(2, 2, figsize=(9, 9))
for i in range(2):
    for j in range(2):
        random_index = np.random.randint(0, 20000)
        x_ray, label = train_dataset[random_index]
        axis[i][j].imshow(x_ray[0], cmap="bone")
        axis[i][j].set_title(f"Label:{label}")



# In[9]:


batch_size = 64
num_workers = 4

train_loader = torch.utils.data.DataLoader(train_dataset,batch_size= batch_size, num_workers=0, shuffle = True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=0, shuffle=False)

print(f"There are {len(train_dataset)} train images and {len(val_dataset)} val images")


# The classes are imbalanced: There are more images without signs of pneumonia than with pneumonia.
# There are multiple ways to deal with imbalanced datasets:

    #Weighted Loss
    #Oversampling
    #Doing nothing :)

#In this example, we will simply do nothing as this often yields the best results. 
# Buf feel free to play around with a weighted loss. A template to define a customized weighted loss function is provided below.

#Oversampling will be shown in a later lecture.


# In[10]:


np.unique(train_dataset.targets, return_counts=True), np.unique(val_dataset.targets, return_counts=True)


# In[11]:


torchvision.models.resnet18()


# In[12]:


# Important: Lighting pytorch training model 

class PneumoniaModel(pl.LightningModule):
    def __init__(self, weight=1):
        super().__init__()

        self.model = torchvision.models.resnet18()
        # change conv1 from 3 to 1 input channels
        self.model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        # change out_feature of the last fully connected layer (called fc in resnet18) from 1000 to 1
        self.model.fc = torch.nn.Linear(in_features=512, out_features=1)

        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)
        self.loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([weight]))

        # simple accuracy computation
        self.train_acc = torchmetrics.Accuracy(task='binary')
        self.val_acc = torchmetrics.Accuracy(task='binary')

    def forward(self, data):
        pred = self.model(data)
        return pred

    def training_step(self, batch, batch_idx):
        x_ray, label = batch
        label = label.float()  # Convert label to float (just needed for loss computation)
        pred = self(x_ray)[:, 0]  # Prediction: Make sure prediction and label have same shape
        loss = self.loss_fn(pred, label)  # Compute the loss

        # Log loss and batch accuracy
        self.log("Train Loss", loss)
        self.log("Step Train Acc", self.train_acc(torch.sigmoid(pred), label.int()))
        return loss

    def on_train_epoch_end(self, outs):
        # After one epoch compute the whole train_data accuracy
        self.log("Train Acc", self.train_acc.compute())

    def validation_step(self, batch, batch_idx):
        # Same steps as in the training_step
        x_ray, label = batch
        label = label.float()
        pred = self(x_ray)[:, 0]  # make sure prediction and label have same shape

        loss = self.loss_fn(pred, label)

        # Log validation metrics
        self.log("Val Loss", loss)
        self.log("Step Val Acc", self.val_acc(torch.sigmoid(pred), label.int()))
        return loss

    def on_validation_epoch_end_epoch_end(self, trainer, pl_module):
        self.log("Val Acc", self.val_acc.compute())

    def configure_optimizers(self):
        # Caution! You always need to return a list here (just pack your optimizer into one :))
        return [self.optimizer]


# In[13]:


model = PneumoniaModel()  # Instanciate the model


# In[14]:


# Create the checkpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor='Val Acc',
    save_top_k=10,
    mode='max')


# In[15]:


trainer = pl.Trainer(accelerator='gpu', max_epochs=35)


# In[24]:


trainer = pl.Trainer(
    accelerator='gpu', 
    logger=TensorBoardLogger(save_dir="C:/Users/Owner/Downloads/Udemy/Pytorch/04-Pneumonia-Classification/04-Pneumonia-Classification/logs"), 
    log_every_n_steps=1,
    callbacks=checkpoint_callback,
    max_epochs=35
)



# In[ ]:


trainer.fit(model, train_loader, val_loader)


# In[18]:





# In[ ]:





def custom_validation_epoch_end(trainer, model, outs):
    model.log("Val Acc", model.val_acc.compute())


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Use strict=False, otherwise we would want to match the pos_weight which is not necessary
model = PneumoniaModel.load_from_checkpoint("weights/weights_1.ckpt")
model.eval()
model.to(device);

preds = []
labels = []

with torch.no_grad():
    for data, label in tqdm(val_dataset):
        data = data.to(device).float().unsqueeze(0)
        pred = torch.sigmoid(model(data)[0].cpu())
        preds.append(pred)
        labels.append(label)
preds = torch.tensor(preds)
labels = torch.tensor(labels).int()




There are 24000 train images and 2684 val images


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params
------------------------------------------------
0 | model     | ResNet            | 11.2 M
1 | loss_fn   | BCEWithLogitsLoss | 0     
2 | train_acc | BinaryAccuracy    | 0     
3 | val_acc   | BinaryAccuracy    | 0     
------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.683    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

