In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [12]:
import torch
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torchvision.models as models
from astra.torch.models import DenseNetClassifier,DenseNet121_Weights  
from astra.torch.utils import train_fn
from astra.torch.metrics import accuracy_score, f1_score, precision_score, recall_score

batch_size = 256

In [2]:
# Load the saved tensors
loaded_data = torch.load("/home/rishabh.mondal/Brick-Kilns-project/albk_rishabh/tensor_data/test_data.pt")

# Access the tensors
index1 = loaded_data['index']
images1 = loaded_data['images']
labels1 = loaded_data['labels']

In [3]:
print(index1.shape, images1.shape, labels1.shape)  

(10025,) torch.Size([10025, 3, 224, 224]) torch.Size([10025])


In [4]:
index1.dtype, images1.dtype, labels1.dtype

(dtype('<U17'), torch.uint8, torch.uint8)

### Stratified Cross Validation spliting of data into train and test set

No need to run this cell, just load the `/home/jaiswalsuraj/suraj_work/Brick-Kilns-Project/data/albk_v2_data/fold_data_delhi.pt`

In [5]:
import torch
from collections import Counter

fold_data = []  # List to store data from each fold

seed = 42  # Use your desired random seed
splitter = StratifiedKFold(n_splits=4, shuffle=True, random_state=seed)
images1 = images1 / 255
    # mean normalize
images1 = (images1 - images1.mean(dim=(0, 2, 3), keepdim=True)) / images1.std(dim=(0, 2, 3), keepdim=True)
for fold, (train_idx, test_idx) in enumerate(splitter.split(images1, labels1)):
    X_train, X_test = images1[train_idx], images1[test_idx]
    y_train, y_test = labels1[train_idx], labels1[test_idx]

    # Count occurrences of each class in train and test sets
    train_counter = Counter(y_train.numpy())
    test_counter = Counter(y_test.numpy())
    print(train_counter)
    print(test_counter)
    print(f"Fold {fold + 1} - Train: {train_counter}, Test: {test_counter}")

    fold_data.append({
        'fold': fold + 1,
        'X_train': X_train,
        'X_test': X_test,
        'y_train': y_train,
        'y_test': y_test,
        'train_counter': train_counter,
        'test_counter': test_counter
    })


Counter({0: 6737, 1: 781})
Counter({0: 2246, 1: 261})
Fold 1 - Train: Counter({0: 6737, 1: 781}), Test: Counter({0: 2246, 1: 261})
Counter({0: 6737, 1: 782})
Counter({0: 2246, 1: 260})
Fold 2 - Train: Counter({0: 6737, 1: 782}), Test: Counter({0: 2246, 1: 260})
Counter({0: 6737, 1: 782})
Counter({0: 2246, 1: 260})
Fold 3 - Train: Counter({0: 6737, 1: 782}), Test: Counter({0: 2246, 1: 260})
Counter({0: 6738, 1: 781})
Counter({0: 2245, 1: 261})
Fold 4 - Train: Counter({0: 6738, 1: 781}), Test: Counter({0: 2245, 1: 261})


In [8]:
# Save the list of fold data using torch.save
# torch.save(fold_data, '/home/jaiswalsuraj/suraj_work/Brick-Kilns-Project/data/albk_v2_data/fold_data_delhi.pt')

In [13]:
# Load the saved fold data from the .pkl file
path = '/home/jaiswalsuraj/suraj_work/Brick-Kilns-Project/data/albk_v2_data/fold_data_delhi.pt'
fold_data = torch.load(path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Lists to store metrics for each fold
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

for fold_info in fold_data:
    fold = fold_info['fold']
    print("Fold: ", fold)
    X_train = fold_info['X_train']
    y_train = fold_info['y_train']
    X_test = fold_info['X_test']
    y_test = fold_info['y_test']
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    # Create DataLoader for training and testing
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    # Create and train the model
    #print datatype of trainloader
    print("trainloader datatype: ", train_loader.dataset.tensors[1].dtype)
    print("testloader datatype: ", test_loader.dataset.tensors[0].dtype)
    train_model = DenseNetClassifier    (
        models.densenet121, DenseNet121_Weights, n_classes=2, activation=nn.ReLU(), dropout=0.1
    ).to(device)

    iter_losses, epoch_losses = train_fn(
        train_model,
        nn.CrossEntropyLoss(),
        dataloader=train_loader,
        lr=3e-4,
        epochs=100,
        verbose=True,
        wandb_log=False,
    )

    # Evaluate the model on the test set
    with torch.no_grad():
        pred_classes = train_model.predict_class(
            dataloader=test_loader, batch_size=batch_size, verbose=True
        ).to(device)

    test_labels = y_test.to(device)
    # Calculate and print metrics for each fold
    
    accuracy = accuracy_score( pred_classes,test_labels)
    precision = precision_score(pred_classes,test_labels)
    recall = recall_score(pred_classes,test_labels)
    f1 = f1_score(pred_classes,test_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\n")
    # Calculate and store metrics for each fold
    accuracy_list.append(accuracy_score(pred_classes, test_labels))
    precision_list.append(precision_score( pred_classes,test_labels))
    recall_list.append(recall_score(pred_classes,test_labels))
    f1_list.append(f1_score(pred_classes,test_labels))

# Calculate and print the mean of metrics across all folds
print("Mean Accuracy: ", sum(accuracy_list) / len(accuracy_list))
print("Mean Precision: ", sum(precision_list) / len(precision_list))
print("Mean Recall: ", sum(recall_list) / len(recall_list))
print("Mean F1: ", sum(f1_list) / len(f1_list))


Fold:  1
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00000115: 100%|██████████| 100/100 [1:03:30<00:00, 38.11s/it]
100%|██████████| 10/10 [00:01<00:00,  7.27it/s]


Accuracy: 0.9801
Precision: 0.9073
Recall: 0.9004
F1 Score: 0.9038


Fold:  2
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00000255: 100%|██████████| 100/100 [26:25<00:00, 15.86s/it] 
100%|██████████| 10/10 [00:01<00:00,  5.51it/s]


Accuracy: 0.9765
Precision: 0.9205
Recall: 0.8462
F1 Score: 0.8818


Fold:  3
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00000097: 100%|██████████| 100/100 [35:37<00:00, 21.37s/it] 
100%|██████████| 10/10 [00:01<00:00,  8.15it/s]


Accuracy: 0.9824
Precision: 0.9186
Recall: 0.9115
F1 Score: 0.9151


Fold:  4
trainloader datatype:  torch.uint8
testloader datatype:  torch.float32


Loss: 0.00000064: 100%|██████████| 100/100 [28:06<00:00, 16.86s/it]
100%|██████████| 10/10 [00:02<00:00,  4.63it/s]


Accuracy: 0.9808
Precision: 0.9209
Recall: 0.8927
F1 Score: 0.9066


Mean Accuracy:  tensor(0.9800, device='cuda:0')
Mean Precision:  tensor(0.9168, device='cuda:0')
Mean Recall:  tensor(0.8877, device='cuda:0')
Mean F1:  tensor(0.9018, device='cuda:0')
