In [1]:
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import matplotlib.pyplot as plt
from torchvision import transforms
import torchvision
import pandas as pd
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score
from itertools import cycle
import torch.nn.functional as F

In [2]:
all_models = torchvision.models.list_models()
classification_models = torchvision.models.list_models(module=torchvision.models)
# print(f"all models: \n {all_models}")
print(f"classif models: \n {classification_models}")

classif models: 
 ['alexnet', 'convnext_base', 'convnext_large', 'convnext_small', 'convnext_tiny', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_v2_l', 'efficientnet_v2_m', 'efficientnet_v2_s', 'googlenet', 'inception_v3', 'maxvit_t', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3', 'mobilenet_v2', 'mobilenet_v3_large', 'mobilenet_v3_small', 'regnet_x_16gf', 'regnet_x_1_6gf', 'regnet_x_32gf', 'regnet_x_3_2gf', 'regnet_x_400mf', 'regnet_x_800mf', 'regnet_x_8gf', 'regnet_y_128gf', 'regnet_y_16gf', 'regnet_y_1_6gf', 'regnet_y_32gf', 'regnet_y_3_2gf', 'regnet_y_400mf', 'regnet_y_800mf', 'regnet_y_8gf', 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'resnext101_32x8d', 'resnext101_64x4d', 'resnext50_32x4d', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_

In [3]:
transforms = {
    # ResNet & DenseNet
    'ResNet': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),

    # EfficientNet_b1
    'EfficientNet': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(240),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),

    # RegNet
    'RegNet': transforms.Compose([
        transforms.Resize(232),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),

    # ConvNext
    'ConvNext': transforms.Compose([
        transforms.Resize(236),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
}

In [4]:
df = pd.read_csv("data/train_classes.csv")
df

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road
...,...,...
40474,train_40474,clear primary
40475,train_40475,cloudy
40476,train_40476,agriculture clear primary
40477,train_40477,agriculture clear primary road


In [5]:
all_tags = set()
for tags in df['tags'].str.split():
    all_tags.update(tags)

In [6]:
tag_to_idx = {tag: idx for idx, tag in enumerate(sorted(all_tags))}
idx_to_tag = {idx: tag for tag, idx in tag_to_idx.items()}
print(tag_to_idx)
print(len(tag_to_idx))

{'agriculture': 0, 'artisinal_mine': 1, 'bare_ground': 2, 'blooming': 3, 'blow_down': 4, 'clear': 5, 'cloudy': 6, 'conventional_mine': 7, 'cultivation': 8, 'habitation': 9, 'haze': 10, 'partly_cloudy': 11, 'primary': 12, 'road': 13, 'selective_logging': 14, 'slash_burn': 15, 'water': 16}
17


In [7]:
class MultiLabelImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx, 0]
        img_path = os.path.join(self.img_dir, f"{img_name}.jpg")
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        tags = self.df.iloc[idx, 1].split()
        labels = torch.zeros(len(tag_to_idx))
        for tag in tags:
            labels[tag_to_idx[tag]] = 1
        
        return image, labels

In [8]:
# def visualize_sample(dataset, idx):
#     image, labels = dataset[idx]
    
#     # convert the image tensor to a PIL Image for display
#     if isinstance(image, torch.Tensor):
#         image = transforms.ToPILImage()(image)
    
#     # plot the image
#     plt.figure(figsize=(10, 6))
#     plt.imshow(image)
#     plt.axis('off')
    
#     # get the labels
#     present_labels = [idx_to_tag[i] for i, label in enumerate(labels) if label == 1]
    
#     # set the title with the labels
#     plt.title(f"Labels: {', '.join(present_labels)}")
#     plt.show()
    
#     print(f"Image labels: {', '.join(present_labels)}")

In [9]:
resnet_dataset = MultiLabelImageDataset(csv_file="data/train_classes.csv", img_dir="data/train-jpg", transform=transforms['ResNet'])
effnet_dataset = MultiLabelImageDataset(csv_file="data/train_classes.csv", img_dir="data/train-jpg", transform=transforms['EfficientNet'])
regnet_dataset = MultiLabelImageDataset(csv_file="data/train_classes.csv", img_dir="data/train-jpg", transform=transforms['RegNet'])

In [10]:
# visualize_sample(dataset, 4)

In [11]:
def split_dataset(dataset):
    train_idx, test_idx = train_test_split(
        list(range(len(dataset))), 
        test_size=0.1, 
        random_state=42
    )
    train_dataset = Subset(dataset, train_idx)
    test_dataset = Subset(dataset, test_idx)
    return train_dataset, test_dataset

resnet_train, resnet_test = split_dataset(resnet_dataset)
effnet_train, effnet_test = split_dataset(effnet_dataset)
regnet_train, regnet_test = split_dataset(regnet_dataset)

In [12]:
torch.cuda.empty_cache()

In [13]:
batch_size = 32

resnet_train_loader = DataLoader(resnet_train, batch_size=batch_size, shuffle=True)
resnet_test_loader = DataLoader(resnet_test, batch_size=batch_size, shuffle=False)

effnet_train_loader = DataLoader(effnet_train, batch_size=batch_size, shuffle=True)
effnet_test_loader = DataLoader(effnet_test, batch_size=batch_size, shuffle=False)

regnet_train_loader = DataLoader(regnet_train, batch_size=batch_size, shuffle=True)
regnet_test_loader = DataLoader(regnet_test, batch_size=batch_size, shuffle=False)

In [14]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [15]:
import torchvision.models as models
from torch import nn

num_classes = 17

def ResNetClassifier(num_classes):  
    # load a pre-trained model
    model_ft = models.resnet50(weights='DEFAULT')
    num_ftrs = model_ft.fc.in_features
    
    # freeze all the parameters in the network except the final layer
    # for param in model_ft.parameters():
    #     param.requires_grad = False
    
    # replace the last fully connected layer
    model_ft.fc = nn.Linear(num_ftrs, num_classes)
    return model_ft

def DenseNetClassifier(num_classes):
    # load a pre-trained model
    model_ft = models.densenet121(weights='DEFAULT')
    num_ftrs = model_ft.classifier.in_features
    
    # freeze all the parameters in the network except the final layer
    # for param in model_ft.parameters():
    #     param.requires_grad = False
    
    # replace the last fully connected layer
    model_ft.classifier = nn.Linear(num_ftrs, num_classes)
    return model_ft

def EfficientNetClassifier(num_classes):
    # load a pre-trained model
    model_ft = models.efficientnet_b1(weights='DEFAULT')
    # num_ftrs = model_ft.classifier.in_features
    
    # freeze all the parameters in the network except the final layer
    # for param in model_ft.parameters():
    #     param.requires_grad = False
    
    # replace the last fully connected layer
    model_ft.classifier = nn.Linear(1280, num_classes)
    return model_ft

def RegNetClassifier(num_classes):
    # load a pre-trained model
    model_ft = models.regnet_y_8gf(weights='DEFAULT')
    print(model_ft)
    num_ftrs = model_ft.fc.in_features
    
    # freeze all the parameters in the network except the final layer
    # for param in model_ft.parameters():
    #     param.requires_grad = False
    
    # replace the last fully connected layer
    model_ft.fc = nn.Linear(num_ftrs, num_classes)
    return model_ft

def ConvNextClassifier(num_classes):
    # load a pre-trained model
    model_ft = models.convnext_tiny(weights='DEFAULT')
    num_ftrs = model_ft.classifier[2].in_features
    
    # freeze all the parameters in the network except the final layer
    # for param in model_ft.parameters():
    #     param.requires_grad = False
    
    # replace the last fully connected layer
    model_ft.classifier = nn.Sequential(
        nn.Flatten(), 
        nn.Linear(num_ftrs, num_classes)
        )
    return model_ft

In [16]:
class EnsembleModel(nn.Module):
    def __init__(self, num_classes, ensemble_type='weighted'):
        super(EnsembleModel, self).__init__()
        
        # initialize individual models
        self.resnet = ResNetClassifier(num_classes)
        self.effnet = EfficientNetClassifier(num_classes)
        self.regnet = RegNetClassifier(num_classes)
        
        # ensemble type
        self.ensemble_type = ensemble_type
        
        # weighted averaging
        if ensemble_type == 'weighted':
            self.weights = nn.Parameter(torch.ones(3) / 3)

        # parameters for shepard's rule
        self.a = 1.0
        self.b = 1.0
    
    def forward(self, x):
        # get predictions from each model
        resnet_out = self.resnet(x)
        effnet_out = self.effnet(x)
        regnet_out = self.regnet(x)
        
        # ensemble strategies
        if self.ensemble_type == 'voting':
            # soft voting - average of predictions
            return (resnet_out + effnet_out + regnet_out) / 3
        
        elif self.ensemble_type == 'weighted':
            # weighted average of predictions
            # normalize weights to sum to 1
            normalized_weights = nn.functional.softmax(self.weights, dim=0)
            
            weighted_out = (
                normalized_weights[0] * resnet_out + 
                normalized_weights[1] * effnet_out + 
                normalized_weights[2] * regnet_out
            )
            return weighted_out

        elif self.ensemble_type == 'shepard':
            distances = torch.stack([
                -torch.max(torch.sigmoid(resnet_out), dim=1)[0],
                -torch.max(torch.sigmoid(effnet_out), dim=1)[0],
                -torch.max(torch.sigmoid(regnet_out), dim=1)[0]
            ], dim=1)

            shepard_weights = torch.exp(-self.a * torch.abs(distances) ** self.b)
            shepard_weights = shepard_weights / shepard_weights.sum(dim=1, keepdim=True)

            weighted_out = (
                shepard_weights[:, 0].unsqueeze(1) * resnet_out +
                shepard_weights[:, 1].unsqueeze(1) * effnet_out +
                shepard_weights[:, 2].unsqueeze(1) * regnet_out
            )
            return weighted_out
        
        else:
            return (resnet_out + effnet_out + regnet_out) / 3

In [17]:
# model = ResNetClassifier(num_classes)
# model = DenseNetClassifier(num_classes)
# model = EfficientNetClassifier(num_classes)
# model = RegNetClassifier(num_classes)
# model = ConvNextClassifier(num_classes)

# model.to(device)
# model

In [18]:
# def train_loop(dataloader, model, loss_fn, optimizer):
#     size = len(dataloader.dataset)
#     model.train()
#     for batch, (X, y) in enumerate(dataloader):
#         X, y = X.to(device), y.to(device)
#         pred = model(X)
#         loss = loss_fn(pred, y)

#         loss.backward()
#         optimizer.step()
#         optimizer.zero_grad()

#         if batch % 64 == 0:
#             loss, current = loss.item(), batch * batch_size + len(X)
#             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


# def test_loop(dataloader, model, loss_fn):
#     model.eval()
#     size = len(dataloader.dataset)
#     num_batches = len(dataloader)
#     test_loss, f2 = 0, 0

#     with torch.no_grad():
#         for X, y in dataloader:
#             X, y = X.to(device), y.to(device)
#             pred = model(X)
#             test_loss += loss_fn(pred, y).item()

#             # calculate f2 score
#             pred_tags = torch.sigmoid(pred).cpu().numpy() > 0.24
#             true_tags = y.cpu().numpy()
#             f2 += fbeta_score(true_tags, pred_tags, beta=2, average='micro')

#     test_loss /= num_batches
#     f2 /= num_batches
    
#     print(f"Test Error: \n f2 score: {f2:.5f}, avg loss: {test_loss:>8f} \n")
#     return f2, test_loss


In [19]:
print(device)

cuda


In [20]:
def train_ensemble_model(
    resnet_train_loader, 
    effnet_train_loader,
    regnet_train_loader,
    resnet_test_loader, 
    effnet_test_loader,
    regnet_test_loader,
    num_classes, 
    epochs, 
    learning_rate, 
    threshold,
    ensemble_type) :

    model = EnsembleModel(num_classes, ensemble_type).to(device)
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.75)
    
    # training loop
    all_loss = []
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        
        dataloaders = [resnet_train_loader, effnet_train_loader, regnet_train_loader]
        max_loader_len = max(len(loader) for loader in dataloaders)
        
        model.train()
        total_loss = 0
        for batch_idx in range(max_loader_len):
            # cycle through dataloaders
            X_resnet, y_resnet = next(cycle(resnet_train_loader))
            X_effnet, y_effnet = next(cycle(effnet_train_loader))
            X_regnet, y_regnet = next(cycle(regnet_train_loader))
            
            X_resnet = X_resnet.to(device)
            X_effnet = X_effnet.to(device)
            X_regnet = X_regnet.to(device)
            
            y_resnet = y_resnet.to(device)
            y_effnet = y_effnet.to(device)
            y_regnet = y_regnet.to(device)
            
            # get predictions
            resnet_out = model.resnet(X_resnet)
            effnet_out = model.effnet(X_effnet)
            regnet_out = model.regnet(X_regnet)
            
            # compute losses
            loss_resnet = loss_fn(resnet_out, y_resnet)
            loss_effnet = loss_fn(effnet_out, y_effnet)
            loss_regnet = loss_fn(regnet_out, y_regnet)
            
            # total loss
            loss = (loss_resnet + loss_effnet + loss_regnet) / 3
            
            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            if batch_idx % 64 == 0:
                print(f"loss: {loss.item():>7f}")
        
        # validation loop
        model.eval()
        test_loss, f2 = 0, 0
        num_batches = min(len(resnet_test_loader), len(effnet_test_loader), len(regnet_test_loader))
        
        with torch.no_grad():
            for (X_resnet, y_resnet), (X_effnet, y_effnet), (X_regnet, y_regnet) in zip(
                resnet_test_loader, effnet_test_loader, regnet_test_loader
            ):
                X_resnet = X_resnet.to(device)
                X_effnet = X_effnet.to(device)
                X_regnet = X_regnet.to(device)
                
                y_resnet = y_resnet.to(device)
                y_effnet = y_effnet.to(device)
                y_regnet = y_regnet.to(device)
                
                # get model predictions
                pred_resnet = model.resnet(X_resnet)
                pred_effnet = model.effnet(X_effnet)
                pred_regnet = model.regnet(X_regnet)
                
                # ensemble prediction
                if model.ensemble_type == 'weighted':
                    normalized_weights = F.softmax(model.weights, dim=0)
                    pred = (
                        normalized_weights[0] * pred_resnet + 
                        normalized_weights[1] * pred_effnet + 
                        normalized_weights[2] * pred_regnet
                    )

                elif model.ensemble_type == 'shepard':
                    # shepard's rule dynamic weights
                    distances = torch.stack([
                        -torch.max(torch.sigmoid(pred_resnet), dim=1)[0],
                        -torch.max(torch.sigmoid(pred_effnet), dim=1)[0],
                        -torch.max(torch.sigmoid(pred_regnet), dim=1)[0]
                    ], dim=1)  # Shape: [batch_size, num_models]
                    
                    # shepard weights
                    shepard_weights = torch.exp(-model.a * torch.abs(distances) ** model.b)
                    shepard_weights = shepard_weights / shepard_weights.sum(dim=1, keepdim=True)
                    
                    # ensemble prediction
                    pred = (
                        shepard_weights[:, 0].unsqueeze(1) * pred_resnet + 
                        shepard_weights[:, 1].unsqueeze(1) * pred_effnet + 
                        shepard_weights[:, 2].unsqueeze(1) * pred_regnet
                    )

                else:
                    pred = (pred_resnet + pred_effnet + pred_regnet) / 3
                
                # compute test loss
                test_loss += loss_fn(pred, y_resnet).item()
                
                # calculate f2 score
                pred_tags = torch.sigmoid(pred).cpu().numpy() > threshold
                true_tags = y_resnet.cpu().numpy()
                f2 += fbeta_score(true_tags, pred_tags, beta=2, average='micro')
        
        test_loss /= num_batches
        f2 /= num_batches
        
        print(f"Test Error: \n f2 score: {f2:.5f}, avg loss: {test_loss:>8f} \n")
        all_loss.append(test_loss)

        scheduler.step()
        print(scheduler.get_last_lr())
    
    return model, all_loss

In [21]:
learning_rate = 0.0001
epochs = 5
threshold = 0.17

In [22]:
ensemble_model, loss_history = train_ensemble_model(
    resnet_train_loader,
    effnet_train_loader,
    regnet_train_loader,
    resnet_test_loader,
    effnet_test_loader,
    regnet_test_loader,
    num_classes=num_classes,
    epochs=epochs,
    learning_rate=learning_rate,
    threshold=threshold,
    ensemble_type='weighted'
)

RegNet(
  (stem): SimpleStemIN(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (trunk_output): Sequential(
    (block1): AnyStage(
      (block1-0): ResBottleneckBlock(
        (proj): Conv2dNormActivation(
          (0): Conv2d(32, 224, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (f): BottleneckTransform(
          (a): Conv2dNormActivation(
            (0): Conv2d(32, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (b): Conv2dNormActivation(
            (0): Conv2d(224, 224, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=4, bias=False)
      

KeyboardInterrupt: 

In [None]:
# loss_fn = nn.BCEWithLogitsLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# all_loss = []
# for t in range(epochs):
#     print(f"Epoch {t+1}\n-------------------------------")
#     train_loop(train_dataloader, model, loss_fn, optimizer)
#     f2, test_loss = test_loop(test_dataloader, model, loss_fn)
#     all_loss.append(test_loss)
# print("Done!")

In [None]:
print(all_loss)

epochs_list = list(range(1, len(all_loss) + 1))
# print(len(all_loss))
# print(epochs)

plt.plot(epochs_list, all_loss, marker='o', color='b', label='Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
def predict_image(model, image_path, transform, idx_to_tag):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)
    image = image.to(device)

    with torch.no_grad():
        outputs = model(image)
        probabilities = torch.sigmoid(outputs)
        predicted = probabilities > 0.24
        predicted_labels = [idx_to_tag[i] for i, pred in enumerate(predicted[0]) if pred]

    return predicted_labels, probabilities[0]

In [None]:
image_path = "data/test-jpg/test_5689.jpg"
predicted_labels, probabilities = predict_image(model, image_path, transform, idx_to_tag)

print("Predicted labels:", predicted_labels)
print("Probabilities:")
for i, prob in enumerate(probabilities):
    if prob > 0.24:
        print(f"{idx_to_tag[i]}: {prob.item():.4f}")