In [36]:
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import pandas as pd
from torchvision import transforms
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score
from sklearn.model_selection import StratifiedKFold
import numpy as np

In [37]:
df = pd.read_csv("data/planet\planet/train_classes.csv") # Adjust as needed
df
all_tags = set()
for tags in df['tags'].str.split():
    all_tags.update(tags)
print(all_tags)
tag_to_idx = {tag: idx for idx, tag in enumerate(sorted(all_tags))}
idx_to_tag = {idx: tag for tag, idx in tag_to_idx.items()}
print(tag_to_idx)
print(len(tag_to_idx))

{'road', 'selective_logging', 'bare_ground', 'slash_burn', 'blooming', 'partly_cloudy', 'habitation', 'agriculture', 'conventional_mine', 'blow_down', 'haze', 'clear', 'water', 'cloudy', 'artisinal_mine', 'cultivation', 'primary'}
{'agriculture': 0, 'artisinal_mine': 1, 'bare_ground': 2, 'blooming': 3, 'blow_down': 4, 'clear': 5, 'cloudy': 6, 'conventional_mine': 7, 'cultivation': 8, 'habitation': 9, 'haze': 10, 'partly_cloudy': 11, 'primary': 12, 'road': 13, 'selective_logging': 14, 'slash_burn': 15, 'water': 16}
17


In [38]:
# ResNet & DenseNet
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# EfficientNet_b1
# transform = transforms.Compose([
#     transforms.Resize(256),
#     transforms.CenterCrop(240),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ])

In [39]:
class MultiLabelImageDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        # if type(csv_file) != str:
        #     self.df = csv_file
        # else:
        #     self.df = pd.read_csv(csv_file)
        self.df = df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        if "aug" in img_name:
            img_path = os.path.join("data/augmented_images/", f"{img_name}.jpg")
            #img_path = os.path.join(self.img_dir, f"{img_name}")
        else:
            img_path = os.path.join(self.img_dir, f"{img_name}.jpg")
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)
        
        tags = self.df.iloc[idx, 1].split()
        
        labels = torch.zeros(len(tag_to_idx))
        for tag in tags:
            labels[tag_to_idx[tag]] = 1
        
        return image, labels

In [40]:
import torchvision.models as models
from torch import nn

num_classes = 17

def ResNetClassifier(num_classes):
    # load a pre-trained model
    model_ft = models.resnet50(weights='DEFAULT')
    num_ftrs = model_ft.fc.in_features
    
    # freeze all the parameters in the network except the final layer
    # for param in model_ft.parameters():
    #     param.requires_grad = False
    
    # replace the last fully connected layer
    model_ft.fc = nn.Linear(num_ftrs, num_classes)
    return model_ft

def EfficientNetClassifier(num_classes):
    # load a pre-trained model
    model_ft = models.efficientnet_b1(weights='DEFAULT')
    # num_ftrs = model_ft.classifier.in_features
    
    # freeze all the parameters in the network except the final layer
    # for param in model_ft.parameters():
    #     param.requires_grad = False
    
    # replace the last fully connected layer
    model_ft.classifier = nn.Linear(1280, num_classes)
    return model_ft

In [41]:
# Create validation group before anything happens
train, val1 = train_test_split(df, test_size=0.1)
train, val2 = train_test_split(df, test_size=0.1)
train, val3 = train_test_split(df, test_size=0.1)
train, val4 = train_test_split(df, test_size=0.1)
train, val5 = train_test_split(df, test_size=0.1)

In [42]:
df_to_load = train
dataset = MultiLabelImageDataset(df_to_load, img_dir="data/planet\planet/train-jpg", transform=transform)
train_idx, test_idx = train_test_split(list(range(len(dataset))), test_size=0.1, random_state=42)
# Since valiation split already done above with original data so no need

# # Only quick testing
#train_idx_small = train_idx[:1000]  # First 1000 training samples
#test_idx_small = test_idx[:200]  # First 200 testing samples

#train_idx_small = train_idx[:32787] 
#test_idx_small = test_idx[:3644] 

#train_dataset = Subset(dataset, train_idx_small)
#test_dataset = Subset(dataset, test_idx_small)
train_dataset = Subset(dataset, list(range(len(df_to_load))))
val1_dataset = MultiLabelImageDataset(val1, img_dir="data/planet\planet/train-jpg", transform=transform)
val1_dataset = Subset(val1_dataset, list(range(len(val1))))
val2_dataset = MultiLabelImageDataset(val2, img_dir="data/planet\planet/train-jpg", transform=transform)
val2_dataset = Subset(val2_dataset, list(range(len(val2))))
val3_dataset = MultiLabelImageDataset(val3, img_dir="data/planet\planet/train-jpg", transform=transform)
val3_dataset = Subset(val3_dataset, list(range(len(val3))))
val4_dataset = MultiLabelImageDataset(val4, img_dir="data/planet\planet/train-jpg", transform=transform)
val4_dataset = Subset(val4_dataset, list(range(len(val4))))
val5_dataset = MultiLabelImageDataset(val5, img_dir="data/planet\planet/train-jpg", transform=transform)
val5_dataset = Subset(val5_dataset, list(range(len(val5))))


#train_dataset = Subset(dataset, train_idx)
#test_dataset = Subset(dataset, test_idx)

print(len(train_idx))
print(len(test_idx))



batch_size = 8

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
val1_dataloader = DataLoader(val1_dataset, batch_size=batch_size, shuffle=False)
val2_dataloader = DataLoader(val2_dataset, batch_size=batch_size, shuffle=False)
val3_dataloader = DataLoader(val3_dataset, batch_size=batch_size, shuffle=False)
val4_dataloader = DataLoader(val4_dataset, batch_size=batch_size, shuffle=False)
val5_dataloader = DataLoader(val5_dataset, batch_size=batch_size, shuffle=False)

32787
3644


In [43]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [44]:
class EnsembleModel(nn.Module):
    def __init__(self, num_classes, ensemble_type='weighted'):
        super(EnsembleModel, self).__init__()
        
        # initialize individual models
        self.resnet = ResNetClassifier(num_classes)
        self.effnet = ResNetClassifier(num_classes)
        self.resnet2 = ResNetClassifier(num_classes)

        self.resnet.load_state_dict(torch.load("LP_oversampled_ResNet50_0.5epochs_1e-4_ADAM.pth", weights_only=True))
        self.effnet.load_state_dict(torch.load("ML_oversampled_augmented_ResNet50_5epochs_1e-4_ADAM.pth", weights_only=True))
        self.resnet2.load_state_dict(torch.load("LP_oversampled_ResNet50_0.5epochs_1e-4_ADAM.pth", weights_only=True))
        
        # ensemble type
        self.ensemble_type = ensemble_type
        
        # weighted averaging - initialised to 1/3 each
        if ensemble_type == 'weighted':
            self.weights = nn.Parameter(torch.ones(3) / 3)

        # parameters for shepard's rule
        self.a = 1.0
        self.b = 1.0
    
    def forward(self, x):
        # get predictions from each model
        resnet_out = self.resnet(x)
        effnet_out = self.effnet(x)
        resnet2_out = self.resnet2(x)
        
        # ensemble strategies
        if self.ensemble_type == 'voting':
            # soft voting - average of predictions
            return (resnet_out + effnet_out + resnet2_out) / 3
        
        elif self.ensemble_type == 'weighted':
            # weighted average of predictions
            # normalize weights to sum to 1
            normalized_weights = nn.functional.softmax(self.weights, dim=0)
            
            weighted_out = (
                normalized_weights[0] * resnet_out + 
                normalized_weights[1] * effnet_out + 
                normalized_weights[2] * resnet2_out
            )
            return weighted_out

        elif self.ensemble_type == 'dudani':
            # dudani's rule weights
            distances = torch.stack([
                -torch.max(torch.sigmoid(resnet_out), dim=1)[0],
                -torch.max(torch.sigmoid(effnet_out), dim=1)[0],
                -torch.max(torch.sigmoid(resnet2_out), dim=1)[0]
            ], dim=1) 
            
            d1, _ = torch.min(distances, dim=1, keepdim=True)
            dq, _ = torch.max(distances, dim=1, keepdim=True)
            
            diff = dq - d1
            diff[diff == 0] = 1e-10
            
            dudani_weights = (dq - distances) / diff
            dudani_weights = dudani_weights / dudani_weights.sum(dim=1, keepdim=True)

            weighted_out = (
                dudani_weights[:, 0].unsqueeze(1) * resnet_out +
                dudani_weights[:, 1].unsqueeze(1) * effnet_out +
                dudani_weights[:, 2].unsqueeze(1) * resnet2_out
            )
            return weighted_out

        elif self.ensemble_type == 'shepard':
            distances = torch.stack([
                -torch.max(torch.sigmoid(resnet_out), dim=1)[0],
                -torch.max(torch.sigmoid(effnet_out), dim=1)[0],
                -torch.max(torch.sigmoid(resnet2_out), dim=1)[0]
            ], dim=1)

            shepard_weights = torch.exp(-self.a * torch.abs(distances) ** self.b)
            shepard_weights = shepard_weights / shepard_weights.sum(dim=1, keepdim=True)

            weighted_out = (
                shepard_weights[:, 0].unsqueeze(1) * resnet_out +
                shepard_weights[:, 1].unsqueeze(1) * effnet_out +
                shepard_weights[:, 2].unsqueeze(1) * resnet2_out
            )
            return weighted_out
        
        else:
            return (resnet_out + effnet_out + resnet2_out) / 3

In [45]:
#test = ResNetClassifier(17)
test = EnsembleModel(17,'weighted')
test.to(device)
test.load_state_dict(torch.load("LP_OS_ML_OS_ResNet50.pth"))
test.eval()

  test.load_state_dict(torch.load("LP_OS_ML_OS_ResNet50.pth"))


EnsembleModel(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
     

In [46]:
def custom_val_loop(dataloader, model, threshold):
    model.eval()
    num_batches = len(dataloader)
    f2 = 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)

            # calculate f2 score
            pred_tags = torch.sigmoid(pred).cpu().numpy() > threshold #0.24
            true_tags = y.cpu().numpy()
            #f2 += fbeta_score(true_tags, pred_tags, beta=2, average='micro')
            print(fbeta_score(true_tags, pred_tags, beta=2, average='micro'))

    f2 /= num_batches
    return f2

In [32]:
threshold_list = [i/100 for i in range(10, 41)]

for threshold in threshold_list:
    print(threshold, custom_val_loop(val1_dataloader, test, threshold))
    #print(threshold, custom_val_loop(val1_dataloader, test, threshold))
    # print(threshold, custom_val_loop(val2_dataloader, test, threshold))
    # print(threshold, custom_val_loop(val3_dataloader, test, threshold))
    # print(threshold, custom_val_loop(val4_dataloader, test, threshold))
    # print(threshold, custom_val_loop(val5_dataloader, test, threshold))
    

0.9900990099009901
0.9895833333333334
0.9813084112149532
0.9917355371900827
1.0
1.0
1.0
1.0


KeyboardInterrupt: 

In [47]:
def fbeta_score_by_class(pred_tags, true_tags, beta, average):
    """
    Calculate the F2 score for each class.
    pred_tags: numpy array of shape (batch_size, num_classes)
    true_tags: numpy array of shape (batch_size, num_classes)

    Returns:
    f2_list: numpy array of shape (num_classes,)
    """
    pred_tags = pred_tags.T
    true_tags = true_tags.T

    f2_list = np.zeros(pred_tags.shape[0])

    for i in range(pred_tags.shape[0]):
        pred_class = pred_tags[i]  # shape (batch_size,)
        true_class = true_tags[i]  # shape (batch_size,)
        f2_list[i] = fbeta_score(true_class, pred_class, beta=beta, average=average)

    return f2_list

In [49]:
def test_threshold_indiv_class(dataloader, model, threshold_list, num_classes):
  model.eval()
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  f2_list = np.zeros((len(threshold_list),num_classes))
  best_threshold_per_class = np.zeros(num_classes)

  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      true_tags = y.cpu().numpy()

      pred = torch.sigmoid(pred).cpu().numpy()
      #print(pred.shape)

      for i in range(len(threshold_list)):
        pred_tags = pred > threshold_list[i]
        #print(pred_tags.shape)
        f2_list[i] += fbeta_score_by_class(pred_tags, true_tags, beta=2, average="micro")

  f2_list /= num_batches

  f2_list = f2_list.T   # f2_list: shape of (num_classes x len(threshold_list))

  for i in range(f2_list.shape[0]):
    idx = np.argmax(f2_list[i])
    print(f"Best threshold for class {i}: {threshold_list[idx]}, f2: {f2_list[i][idx]}")
    print("----------------------------------------------")
    best_threshold_per_class[i] = threshold_list[idx]
  print(f2_list)

  return f2_list, best_threshold_per_class

In [None]:
threshold_list = [0.5,0.45,0.4,0.35,0.3,0.25,0.2,0.15,0.1,0.05]
threshold_list = [i/100 for i in range(1, 41)]
num_classes = 17
f2_list_1, best_threshold_per_class_1 = test_threshold_indiv_class(val1_dataloader, test, threshold_list, num_classes)
f2_list_2, best_threshold_per_class_2 = test_threshold_indiv_class(val2_dataloader, test, threshold_list, num_classes)
f2_list_3, best_threshold_per_class_3 = test_threshold_indiv_class(val3_dataloader, test, threshold_list, num_classes)
f2_list_4, best_threshold_per_class_4 = test_threshold_indiv_class(val4_dataloader, test, threshold_list, num_classes)
f2_list_5, best_threshold_per_class_5 = test_threshold_indiv_class(val5_dataloader, test, threshold_list, num_classes)

Best threshold for class 0: 0.39, f2: 0.9888833992094862
----------------------------------------------
Best threshold for class 1: 0.06, f2: 0.9995059288537549
----------------------------------------------
Best threshold for class 2: 0.11, f2: 0.9992588932806324
----------------------------------------------
Best threshold for class 3: 0.14, f2: 0.9990118577075099
----------------------------------------------
Best threshold for class 4: 0.02, f2: 0.9997529644268774
----------------------------------------------
Best threshold for class 5: 0.39, f2: 0.9933300395256917
----------------------------------------------
Best threshold for class 6: 0.31, f2: 0.9940711462450593
----------------------------------------------
Best threshold for class 7: 0.01, f2: 1.0
----------------------------------------------
Best threshold for class 8: 0.3, f2: 0.9923418972332015
----------------------------------------------
Best threshold for class 9: 0.39, f2: 0.995306324110672
------------------------

In [57]:
print(best_threshold_per_class_1)
print(best_threshold_per_class_2)
print(best_threshold_per_class_3)
print(best_threshold_per_class_4)
print(best_threshold_per_class_5)

stacked = np.stack([best_threshold_per_class_1, best_threshold_per_class_2, best_threshold_per_class_3, best_threshold_per_class_4, best_threshold_per_class_5])
masked = np.ma.masked_array(stacked, mask=(stacked==0.01))
average = np.mean(masked, axis=0)
print(average)

[0.39 0.06 0.11 0.14 0.02 0.39 0.31 0.01 0.3  0.39 0.35 0.24 0.37 0.33
 0.33 0.05 0.37]
[0.37 0.04 0.25 0.2  0.01 0.4  0.28 0.01 0.38 0.16 0.32 0.34 0.4  0.25
 0.2  0.06 0.38]
[0.37 0.05 0.33 0.09 0.01 0.39 0.27 0.03 0.38 0.15 0.39 0.34 0.4  0.26
 0.05 0.02 0.26]
[0.38 0.06 0.35 0.36 0.05 0.36 0.28 0.14 0.35 0.26 0.38 0.27 0.4  0.25
 0.04 0.03 0.32]
[0.39 0.16 0.27 0.07 0.02 0.39 0.31 0.29 0.31 0.22 0.38 0.4  0.38 0.21
 0.08 0.03 0.39]
[0.38 0.074 0.262 0.17200000000000001 0.030000000000000002 0.386
 0.29000000000000004 0.15333333333333332 0.34400000000000003
 0.23600000000000004 0.364 0.31800000000000006 0.38999999999999996 0.26
 0.14 0.038 0.34400000000000003]
