In [None]:
# Author: Bonaventure F. P. Dossou - bonaventure.dossou@mila.quebec (bonaventuredossou.github.io)
# Data transformation, Models Configurations and Training (more details on Tuberculosis_Solution.md)
# Check License under LICENSE.md
from __future__ import print_function 
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, transforms, models
from torchvision.models import resnet18, resnet50, resnet152, regnet_y_128gf, efficientnet_v2_m, efficientnet_v2_l, convnext_base, convnext_large, wide_resnet101_2, vgg19_bn, regnet_x_32gf, swin_b, swin_v2_b, maxvit_t, vit_h_14
import matplotlib.pyplot as plt
import time
import os
from tqdm import tqdm
import copy
os.environ['TORCH_HOME'] = os.path.join('/','home','ngsci','project')

In [None]:
!rm -r '../tb/train/0/.ipynb_checkpoints'
# !rm -r '/home/ngsci/project/tb/train/1/*ipynb_checkpoints*'

# !rm -r '/home/ngsci/project/tb/val/0/*ipynb_checkpoints*'
# !rm -r '/home/ngsci/project/tb/val/1/*ipynb_checkpoints*'

In [None]:
data_dir = os.path.join('/','home','ngsci','project', 'tb')
num_classes = 2
batch_size = 32 # gives best current result
num_epochs = 10
feature_extract = False

In [None]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [None]:
# Evaluation
import glob
import pandas as pd

test_set = pd.read_csv('/home/ngsci/datasets/tb-wellgen-smear/supplementary/contest/tb-holdout-manifest.csv')
test_images_paths = test_set.file_path.tolist()
print('Test Images: {}'.format(len(test_images_paths)))

In [None]:
from PIL import Image
from tqdm import tqdm
import csv
import pandas as pd

def run_inference_image(path, model, model_name, device):
    model.eval()
    image_id = os.path.basename(path).split('.')[0]
    image = Image.open(path)
    transform_data = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.CenterCrop((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    img_t = transform_data(image)
    img_t = img_t.float().unsqueeze(0)
    img_t = img_t.to(device)
    with torch.no_grad():
        output = model(img_t)
    
    prediction = output.squeeze(0).softmax(0)
    all_proba = prediction.cpu().numpy().tolist()
    class_one_proba = all_proba[1]
    return image_id, class_one_proba

def run_inference(paths, model, model_name, device):
    predictions = []
    for index in tqdm(range(len(paths)), desc ="Evaluation Progress"):
        predictions.append(run_inference_image(paths[index], model, model_name, device))
    pred_dict = {image_id: proba_one for image_id, proba_one in predictions}
    return pred_dict

def build_model(model_name):
    
    model_lr_map = {'resnet18': 1e-5, 'resnet50': 1e-4, 'resnet152': 1e-5, 
                    'efficientnet': 4e-4, 'convnext': 1e-5, "wide_resnet101": 1e-4,
                    "vgg": 1e-4, "regnet": 1e-5, "swin": 1e-5, "maxvit": 1e-4, "vit": 1e-4}    

    
    if model_name == "resnet50":
        lr = model_lr_map[model_name]
        model_ft = resnet50(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft
    
    if model_name == "wide_resnet101":
        lr = model_lr_map[model_name]
        model_ft = wide_resnet101_2(weights=None)
        checkpoints = torch.load('finetuned_weights/sgd_no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft

    if model_name == "vgg":
        lr = model_lr_map[model_name]
        model_ft = vgg19_bn(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft
    
    if model_name == "efficientnet":
        lr = model_lr_map[model_name]
        model_ft = efficientnet_v2_l(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.classifier[1].in_features
        model_ft.classifier[1] = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft

    if model_name == "convnext":
        lr = model_lr_map[model_name]
        model_ft = convnext_large(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.classifier[2].in_features
        model_ft.classifier[2] = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft

    if model_name == "regnet":
        lr = model_lr_map[model_name]
        model_ft = regnet_y_128gf(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft

    if model_name == "swin":
        lr = model_lr_map[model_name]
        model_ft = swin_v2_b(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.head.in_features
        model_ft.head = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft

    if model_name == "maxvit":
        lr = model_lr_map[model_name]
        model_ft = maxvit_t(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.classifier[5].in_features
        model_ft.classifier[5] = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft

    if model_name == "vit":
        lr = model_lr_map[model_name]
        model_ft = vit_h_14(weights=None)
        checkpoints = torch.load('finetuned_weights/no_aug_tb_0_{}_{}_{}_{}.pt'.format(model_name, batch_size, num_epochs, lr))
        num_ftrs = model_ft.heads.head.in_features
        model_ft.heads.head = nn.Linear(num_ftrs, 2)
        model_ft.load_state_dict(checkpoints['state_dict'])
        return model_ft

def save_predictions(pred_dict, name_model):
    frame = pd.DataFrame()
    frame['image_id'] = list(pred_dict.keys())
    frame['proba_class_one'] = list(pred_dict.values())    
    frame.to_csv('predictions/predictions_0_{}_{}_{}.csv'.format(name_model, batch_size, num_epochs), index=False)

models_names = ["resnet50", "wide_resnet101", "efficientnet", "convnext", "vgg", "regnet", "swin", "maxvit"]

for model_ in models_names:
    print('Predicting for {}'.format(model_))
    loaded_model = build_model(model_)
    num_gpus = [i for i in range(torch.cuda.device_count())]
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    if len(num_gpus) > 1:
        print("Let's use", len(num_gpus), "GPUs!")
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in num_gpus)
        loaded_model = torch.nn.DataParallel(loaded_model, device_ids=num_gpus)
        loaded_model = loaded_model.module
    
    loaded_model = loaded_model.to(device)
    predictions_dict = run_inference(test_images_paths, loaded_model, model_, device)
    save_predictions(predictions_dict, model_)

In [8]:
from scipy.stats import gmean, tmean

resnet50_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("resnet50", batch_size, num_epochs))
efficientnet_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("efficientnet", batch_size, num_epochs))
convnext_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("convnext", batch_size, num_epochs))
wide_resnet_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("wide_resnet101", batch_size, num_epochs))
vgg_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("vgg", batch_size, num_epochs))
regnet_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("regnet", batch_size, num_epochs))
swin_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("swin", batch_size, num_epochs))
maxvit_0 = pd.read_csv('predictions/predictions_0_{}_{}_{}.csv'.format("maxvit", batch_size, num_epochs))


image_ids = resnet50_0.image_id.tolist()
rest50_preds_0 = resnet50_0.proba_class_one.tolist()
efficientnet_preds_0 = efficientnet_0.proba_class_one.tolist()
convnext_preds_0 = convnext_0.proba_class_one.tolist()
wide_resnet_preds_0 = wide_resnet_0.proba_class_one.tolist()
vgg_preds_0 = vgg_0.proba_class_one.tolist()
regnet_preds_0 = regnet_0.proba_class_one.tolist()
swin_preds_0 = swin_0.proba_class_one.tolist()
maxvit_preds_0 = maxvit_0.proba_class_one.tolist()

def arith_geo_mean(list_of_proba):
    geo_mean = gmean(list_of_proba)
    arith_mean = tmean(list_of_proba)
    return geo_mean * arith_mean

geo_arith_means = [arith_geo_mean([r50, effnet, convnext_pred, wide_resnet_pred, vgg_pred, regnet_pred, swin_pred, maxvit_pred])
             for r50, effnet, convnext_pred, wide_resnet_pred, vgg_pred, regnet_pred, swin_pred, maxvit_pred in 
             zip(rest50_preds_0, efficientnet_preds_0, convnext_preds_0, wide_resnet_preds_0, vgg_preds_0, regnet_preds_0, swin_preds_0, maxvit_preds_0)]

final_frame = pd.DataFrame()
final_frame['image_id'] = image_ids
final_frame['proba_class_one'] = geo_arith_means
final_frame.to_csv('predictions/no_aug_deep_ensemble_{}_{}_with_AdamW.csv'.format(batch_size, num_epochs), index=False, header=False)
print(final_frame.head())

   image_id  proba_class_one
0  0002da84     5.214671e-07
1  0002e02e     7.578788e-07
2  00042206     2.279913e-06
3  00072c47     4.399681e-08
4  0008332c     8.502232e-09


In [None]:
import ngsci
submission_file = 'predictions/no_aug_deep_ensemble_{}_{}_with_AdamW.csv'.format(batch_size, num_epochs)
ngsci.submit_contest_entry(submission_file,
                           description="no_aug_deep_ensemble_32_10_with_AdamW")