In [25]:
# # Install PyTorch Lightning (if not already installed)
# !pip install pytorch-lightning
# !pip install torchmetrics
# !pip install h5py
# !pip install tensorboard

In [26]:
# from google.colab import drive
# drive.mount("/content/drive")

In [27]:
# Import necessary libraries
import torch
import pytorch_lightning as pl
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader,Dataset
from torchmetrics import Accuracy
import torch.nn as nn
import torch.nn.functional as F
import h5py
import pandas as pd
import numpy as np
import os
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

In [28]:

# !ls drive/MyDrive/Dans_lab/Data/secondUnzip/images_rendered_try2/
!ls /panfs/jay/groups/27/kersten/gejji003/Shadows/trained_models/round1

checkpoint  logger


In [29]:
# os.path.exists("drive/MyDrive/Dans_lab/Data/trained_models/round1")

In [30]:
# print(torch.cuda.is_available())

In [31]:
# specifing all valiables in a single place

logger_dir = "/panfs/jay/groups/27/kersten/gejji003/Shadows/trained_models/round1/logger/"
checkpt_dirpath = "/panfs/jay/groups/27/kersten/gejji003/Shadows/trained_models/round1/checkpoint/"
class MyDataManager():
    def __init__(self):

        self.column_x = 'path_to_img'
        self.column_y =  'label'

        self.base_path = "/panfs/jay/groups/27/kersten/gejji003/Shadows/data/images_rendered_try2/"
        self.file_path_train = self.base_path +"trainData.csv"
        self.file_path_val = self.base_path +"valData.csv"
        self.file_path_test = self.base_path +"testData.csv"

        self.column_x = 'path_to_img'
        self.column_y = 'label'

        self.color_channel = "colors"
        self.shadow_channel = "cast_shadow"

        self.default_string = self.base_path

        if not os.path.exists(self.file_path_train):
            raise Exception("The training csv does not exists")


        self.train_df = pd.read_csv(self.file_path_train)
        self.val_df = pd.read_csv(self.file_path_val)
        self.test_df = pd.read_csv(self.file_path_test)

    def split_xy(self,df):
        colx = df[self.column_x]
        x = colx.apply(lambda x: self.default_string + x if isinstance(x, str) else x)

        #print("----------------- to note_________",x[9])
        return x,df[self.column_y]

class MyTansforms():
    def __init__(self):
        pass

    def shadow_training_transform(self):
        self.traintransforms = transforms.Compose([
            transforms.GaussianBlur(kernel_size = 5),
            transforms.RandomResizedCrop(244),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness = 0.2, contrast = 0.2),
            transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
        ])
        return self.traintransforms
    def shadow_test_transform(self,):
        self.testtransforms = transforms.Compose([
            transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
        ])
        return self.testtransforms


In [32]:
class MyDataLoading(Dataset):
    # for fine grain control over hoe each img is preproceed
    def __init__(self, x,y,color_channel,shadow_channel,mode,batch_size = 16, transforms=None):
        self.data_paths = x
        self.label = y
        self.transform = transforms
        self.mode = mode
        self.transforms = transforms
        self.batch_size = 16
        self.color = color_channel
        self.shadow =shadow_channel

    def __len__(self):
        return len(self.data_paths)

    def __getitem__(self, idx):
        # Load and preprocess individual
        image_path = self.data_paths[idx]
        if self.mode =="train":
            try:
                with h5py.File(image_path,"r") as f:
                    colorTensor = torch.tensor(np.array(f[self.color]), dtype= torch.float32)
                    colorTensor = torch.permute(colorTensor, [2,0,1])
#                     shadowTensor = torch.tensor(np.array(f[self.shadow]), dtype= torch.float32).unsqueeze(-1)
#                     shadowTensor = torch.permute(shadowTensor, [2,0,1])
#                 x = torch.cat((colorTensor,shadowTensor), dim = 0)#TODO colorTensor #
                x = colorTensor
                y = torch.tensor(self.label[idx], dtype = torch.long)
            except:
                print("error in reading train file : ",image_path, self.color, self.shadow)
                x = torch.tensor(np.zeros([4,512,512]), dtype = torch.float32)
                y = torch.tensor(np.random.randint(0,2), dtype = torch.long)

        # elif self.mode =="test":
        #     try:
        #         with h5py.File(image_path,"r") as f:
        #             colorTensor = torch.tensor(f[self.color], dtype= torch.float32)
        #             colorTensor = torch.permute(colorTensor, [2,0,1])
        #             shadowTensor = torch.tensor(f[self.shadow], dtype= torch.float32).unsqueeze(-1)
        #             shadowTensor = torch.permute(shadowTensor, [2,0,1])
        #         x = torch.cat((colorTensor,shadowTensor), dim = 0) #colorTensor #
        #         y = torch.tensor(self.label[idx], dtype = torch.long) #TODO
        #     except:
        #         print("error in reading test file : ",image_path)
        #         x = torch.tensor(np.zeros([4,512,512]), dtype = torch.float32)
        #         y = torch.tensor(np.random.randint(0,2), dtype = torch.long)

        return x, y

In [33]:
class MyDataset(pl.LightningModule):

    def __init__(self):
        super().__init__()

        self.data_manager = MyDataManager()
        self.color_channel = self.data_manager.color_channel
        self.shadow_channel = self.data_manager.shadow_channel
        self.num_workers = 16 #TODO
        self.batch_size = 64

        self.transforms = MyTansforms()
        self.transforms_train = self.transforms.shadow_training_transform()

    def train_dataloader(self):
        return DataLoader(self.train_data, batch_size= self.batch_size, shuffle=True, num_workers=self.num_workers)#self.data_manager.get_train_dataloader()
        #,last_drop = True, prefetch_factor = 2
    def val_dataloader(self):
        return DataLoader(self.val_data, batch_size= self.batch_size, shuffle=False, num_workers=self.num_workers)#self.data_manager.get_train_dataloader() #self.data_manager.get_validation_dataloader()

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size= self.batch_size, shuffle=False, num_workers=self.num_workers)

    def setup(self, stage: str) ->None:
        self.trainX, self.trainY = self.data_manager.split_xy(self.data_manager.train_df)
        self.train_data = MyDataLoading(self.trainX, self.trainY, color_channel = self.color_channel, shadow_channel =self.shadow_channel,mode = "train" , transforms =  self.transforms_train )

        self.valX, self.valY = self.data_manager.split_xy(self.data_manager.val_df)
        self.val_data = MyDataLoading(self.valX, self.valY, color_channel = self.color_channel, shadow_channel =self.shadow_channel,mode = "train" , transforms =  self.transforms_train )

        self.testX, self.testY = self.data_manager.split_xy(self.data_manager.test_df)
        self.test_data = MyDataLoading(self.testX, self.testY, color_channel = self.color_channel, shadow_channel =self.shadow_channel,mode = "train" , transforms =  self.transforms_train )



In [34]:
# Define a LightningModule
class MYEfficientNetModel(pl.LightningModule):
    def __init__(self, num_classes, input_channels=3):
        super().__init__() # call the constructor of parents class
        self.save_hyperparameters()
        self.num_classes = num_classes
        self.model = models.efficientnet_b0(pretrained=True)
        self.model.features[0][0] = nn.Conv2d(input_channels, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        # in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(1280, self.num_classes)
        #self.train_acc = Accuracy()  # Create an instance of Accuracy metric for training

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = torch.nn.functional.cross_entropy(logits, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)

        # predicted_probs = torch.sigmoid(logits)
        # predicted_labels = (predicted_probs >= 0.5).float()  # Threshold at 0.5 for binary classification
        # accuracy = (predicted_labels.squeeze() == y).float().mean()  # Make sure to squeeze the predictions if needed

        # self.log('train_acc', accuracy)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = torch.nn.functional.cross_entropy(logits, y)
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True)

        # predicted_probs = torch.sigmoid(logits)
        # predicted_labels = (predicted_probs >= 0.5).float()  # Threshold at 0.5 for binary classification
        # accuracy = (predicted_labels.squeeze() == y).float().mean()  # Make sure to squeeze the predictions if needed

        # self.log('train_acc', accuracy)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)




In [35]:
# myModel = models.efficientnet_b0(pretrained=True)
# print(myModel.features[0][0])

In [36]:
# !ls /content/drive/MyDrive/Dans_lab/Data/tempData/images_rendered_try2

In [37]:

dataset = MyDataset()



In [38]:
model = MYEfficientNetModel(num_classes = 2, input_channels=3)



In [39]:
logger = TensorBoardLogger(save_dir=logger_dir, name="my_experiment")

In [40]:
early_stopping_callback = EarlyStopping(
    monitor='val_loss',  # The metric to monitor for early stopping (e.g., validation loss)
    patience=20,           # Number of epochs with no improvement after which training will be stopped
    mode='min',            # 'min' means we want to minimize the monitored metric, 'max' means maximize
)
model_checkpoint_callback = ModelCheckpoint(
                                    dirpath=checkpt_dirpath,  # Directory to save checkpoints
                                    filename='model-{epoch:02d}-{val_loss:.2f}',  # File name format
                                    save_top_k=3,  # Save the top 3 best models
                                    monitor='val_loss',  # Monitor validation loss
                                    mode='min',  # Choose 'min' or 'max' depending on the monitored metric
                                    )
# Create a Lightning Trainer
trainer = pl.Trainer(max_epochs=100, accelerator = "gpu",logger=logger, log_every_n_steps=1,
                     callbacks=[model_checkpoint_callback, early_stopping_callback]
        )



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [41]:
# Create the model and train
# model = EfficientNetModel(num_classes=len(dataset.classes))
trainer.fit(model, datamodule = dataset)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type         | Params
---------------------------------------
0 | model | EfficientNet | 4.0 M 
---------------------------------------
4.0 M     Trainable params
0         Non-trainable params
4.0 M     Total params
16.040    Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 0: 100%|██████████| 62/62 [00:25<00:00,  2.43it/s, v_num=6, train_loss_step=0.718]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   6%|▋         | 1/16 [00:00<00:00, 79.31it/s][A
Validation DataLoader 0:  12%|█▎        | 2/16 [00:00<00:00, 14.91it/s][A
Validation DataLoader 0:  19%|█▉        | 3/16 [00:00<00:01, 11.82it/s][A
Validation DataLoader 0:  25%|██▌       | 4/16 [00:00<00:01, 10.63it/s][A
Validation DataLoader 0:  31%|███▏      | 5/16 [00:00<00:01,  9.96it/s][A
Validation DataLoader 0:  38%|███▊      | 6/16 [00:00<00:01,  9.60it/s][A
Validation DataLoader 0:  44%|████▍     | 7/16 [00:00<00:00,  9.36it/s][A
Validation DataLoader 0:  50%|█████     | 8/16 [00:00<00:00,  9.20it/s][A
Validation DataLoader 0:  56%|█████▋    | 9/16 [00:00<00:00,  9.11it/s][A
Validation DataLoader 0:  62%|██████▎   | 10/16 [00:01<00:00,  9.05it/s][A
Valid

Validation DataLoader 0: 100%|██████████| 16/16 [00:01<00:00,  8.75it/s][A
Epoch 11: 100%|██████████| 62/62 [00:25<00:00,  2.41it/s, v_num=6, train_loss_step=0.0864, val_loss=0.456, train_loss_epoch=0.0993] 
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   6%|▋         | 1/16 [00:00<00:00, 83.31it/s][A
Validation DataLoader 0:  12%|█▎        | 2/16 [00:00<00:00, 14.90it/s][A
Validation DataLoader 0:  19%|█▉        | 3/16 [00:00<00:01, 11.75it/s][A
Validation DataLoader 0:  25%|██▌       | 4/16 [00:00<00:01, 10.64it/s][A
Validation DataLoader 0:  31%|███▏      | 5/16 [00:00<00:01, 10.10it/s][A
Validation DataLoader 0:  38%|███▊      | 6/16 [00:00<00:01,  9.79it/s][A
Validation DataLoader 0:  44%|████▍     | 7/16 [00:00<00:00,  9.55it/s][A
Validation DataLoader 0:  50%|█████     | 8/16 [00:00<00:00,  9.38it/s][A
Validation DataLoader 0:  56%|█████▋

Validation DataLoader 0:  88%|████████▊ | 14/16 [00:01<00:00,  8.64it/s][A
Validation DataLoader 0:  94%|█████████▍| 15/16 [00:01<00:00,  8.62it/s][A
Validation DataLoader 0: 100%|██████████| 16/16 [00:01<00:00,  8.75it/s][A
Epoch 22: 100%|██████████| 62/62 [00:26<00:00,  2.34it/s, v_num=6, train_loss_step=0.0274, val_loss=0.708, train_loss_epoch=0.0324] 
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   6%|▋         | 1/16 [00:00<00:00, 75.26it/s][A
Validation DataLoader 0:  12%|█▎        | 2/16 [00:00<00:00, 14.44it/s][A
Validation DataLoader 0:  19%|█▉        | 3/16 [00:00<00:01, 11.33it/s][A
Validation DataLoader 0:  25%|██▌       | 4/16 [00:00<00:01, 10.25it/s][A
Validation DataLoader 0:  31%|███▏      | 5/16 [00:00<00:01,  9.72it/s][A
Validation DataLoader 0:  38%|███▊      | 6/16 [00:00<00:01,  9.40it/s][A
Validation DataLoader 0:  44%|████

In [20]:
%reload_ext tensorboard
%tensorboard --logdir /home/kersten/gejji003/Shadows/trained_models/round1/logger/my_experiment/version_5 --port=6006# Specify the directory where your logs are stored

ERROR: Could not find `tensorboard`. Please ensure that your PATH
contains an executable `tensorboard` program, or explicitly specify
the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY`
environment variable.

In [None]:
%reload_ext tensorboard

# Predict and GradCam

In [21]:
def predict(myModelPath, myBatchSize, myNumWorkers):
    model = MYEfficientNetModel.load_from_checkpoint(myModelPath)
    model.eval()
    model.cuda()

#     df_test = pd.read_csv("/panfs/jay/groups/27/kersten/gejji003/Shadows/data/images_rendered_try2/testData.csv")
# # df=self.dfTest, mount_point=self.mountPoint, label=self.label, img_col=self.img_path, color=self.color, transform=self.valid_transform
#     test_ds = MyDataLoading(df=df_test, img_col="path_to_img", label="label", color="colors", transform=shadowValTransforms)
#     self.test_data = MyDataLoading(self.testX, self.testY, color_channel = self.color_channel, shadow_channel =self.shadow_channel,mode = "train" , transforms =  self.transforms_train )
#     test_loader = DataLoader(test_ds, batch_size=myBatchSize, shuffle=False, num_workers=myNumWorkers, pin_memory=True, prefetch_factor=4)
    test_loader = dataset.test_dataloader()

    y_all = []
    pred_all = []
    with torch.no_grad():
        pbar = tqdm(enumerate(test_loader), total=len(test_loader))
        for idx, X in pbar:
            X, y = X
            X = X.cuda().contiguous()
            # print("TYPE X: ", type(X))
            # y = y.cuda()
            myPred = model(X).squeeze().cuda()
            myPred = myPred.cpu()
            # print(myPred)
            if myPred > 0.08:
                myPred = 1
            else:
                myPred = 0
            # print(type(myPred))
            # y = y.cuda()
            # print(type(y))
            # print(y)
            y_all.append(y.int().numpy())
            # print(myPred)
            pred_all.append(myPred)
            # print(y_all)
            # print(pred_all)
        
        myClassRep = classification_report(y_all, pred_all)
        print(myClassRep)
        myConfMat = confusion_matrix(y_all, pred_all)
        print(myConfMat)

In [24]:
myModelPath = "/home/kersten/gejji003/Shadows/trained_models/round1/checkpoint/model-epoch=11-val_loss=0.34.ckpt"
myBatchSize = 1
myNumWorkers = 16
predict(myModelPath = myModelPath, myBatchSize = myBatchSize, myNumWorkers = myNumWorkers)

TypeError: predict() missing 1 required positional argument: 'myCSVPath'

In [8]:
import tqdm
from captum.attr import GuidedGradCam
# import captum.attr