In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
import torchvision
from sklearn.model_selection import train_test_split
from torchvision import transforms, datasets
from torchinfo import summary
from torch.utils.tensorboard import SummaryWriter
import torch.nn.functional as F
import timm
import os
import glob
from PIL import Image
import numpy as np

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
weights = torchvision.models.EfficientNet_V2_L_Weights.IMAGENET1K_V1
auto_transforms = weights.transforms()
auto_transforms

ImageClassification(
    crop_size=[480]
    resize_size=[480]
    mean=[0.5, 0.5, 0.5]
    std=[0.5, 0.5, 0.5]
    interpolation=InterpolationMode.BICUBIC
)

In [4]:
data_transforms = {
    'EffNet_v2_l': auto_transforms,
    'ViT_h14': transforms.Compose([
        transforms.Resize(
            518,
            interpolation=transforms.InterpolationMode.BICUBIC,
        ),
        transforms.CenterCrop(518),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
        )
    ]),
    'EffNet_v2_l_adjsuted' : transforms.Compose([
        transforms.Resize((480, 480)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [5]:
labels = list(range(100))
# Sort the labels alphabetically
labels = [str(x) for x in labels]
labels.sort()
labels = [int(x) for x in labels]
file_paths = sorted(glob.glob('data/test/*.jpg'), key=lambda x: int(os.path.basename(x).split('.')[0]))

In [6]:
model1 = timm.create_model('tf_efficientnetv2_l', pretrained=False)
model1.to(device)

model1.classifier = nn.Sequential(
    nn.Dropout(p=0.25, inplace=False),
    nn.Linear(1280, 100)
).to(device)
model1.load_state_dict(torch.load('ensemble_models\Ethan200_epoch_219_best_loss.pth'))

<All keys matched successfully>

In [7]:
model2 = torch.hub.load("facebookresearch/swag", model="vit_h14_in1k").to(device)
model2.head = torch.nn.Linear(1280, 100).to(device)
model2.load_state_dict(torch.load("ensemble_models/ViT_Huge_FB_epoch_21_best_loss.pth"))

Using cache found in C:\Users\Jason/.cache\torch\hub\facebookresearch_swag_main


In [8]:
model3 = torch.hub.load("facebookresearch/swag", model="vit_h14_in1k").to(device)
model3.head = torch.nn.Linear(1280, 100).to(device)
model3.load_state_dict(torch.load("ensemble_models\ViT_Huge_FB_2Unfrozen_RandomErasing_epoch_18_best_acc.pth"))

Using cache found in C:\Users\Jason/.cache\torch\hub\facebookresearch_swag_main


In [9]:
model4 = timm.create_model('tf_efficientnetv2_l', pretrained=False)
model4.to(device)

model4.classifier = nn.Sequential(
    nn.Dropout(p=0.25, inplace=False),
    nn.Linear(1280, 100)
).to(device)

model4.load_state_dict(torch.load('ensemble_models\Best_EffNet_L_epoch_258_best_acc.pth'))

<All keys matched successfully>

In [10]:
data_dir = 'split_data'

In [11]:
image_paths = sorted(glob.glob('data/test/*.jpg'), key=lambda x: int(os.path.basename(x).split('.')[0]))
labels = list(range(100))
# Sort the labels alphabetically
labels = [str(x) for x in labels]
labels.sort()
labels = [int(x) for x in labels]

In [12]:
predictions = []
model1_preds = []
model2_preds = []
model3_preds = []
model4_preds = []
outputs = []

for n, image_path in enumerate(image_paths):
    # Load image and convert to RGB
    image = Image.open(image_path).convert('RGB') 

    # Apply transformations
    image_effnet = data_transforms['EffNet_v2_l'](image).unsqueeze(0).to(device)  # For EffNet_v2_l
    image_vit = data_transforms['ViT_h14'](image).unsqueeze(0).to(device)  # For ViT_h14
    image_effnet_adjusted = data_transforms['EffNet_v2_l_adjsuted'](image).unsqueeze(0).to(device)  # For EffNet_v2_l

    # Generate base model predictions
    model1.eval()
    model2.eval()
    model3.eval()
    model4.eval()
    with torch.no_grad():
        preds_model1 = model1(image_effnet)
        model1_preds.append(labels[torch.max(preds_model1, 1)[1].item()])
        
        preds_model2 = model2(image_vit)
        model2_preds.append(labels[torch.max(preds_model2, 1)[1].item()])

        preds_model3 = model3(image_vit)
        model3_preds.append(labels[torch.max(preds_model3, 1)[1].item()])

        preds_model4 = model4(image_effnet_adjusted)
        model4_preds.append(labels[torch.max(preds_model4, 1)[1].item()])

        preds_model1 = torch.nn.functional.softmax(preds_model1, dim=1)
        preds_model2 = torch.nn.functional.softmax(preds_model2, dim=1)
        preds_model3 = torch.nn.functional.softmax(preds_model3, dim=1)
        preds_model4 = torch.nn.functional.softmax(preds_model4, dim=1)

    # Average the predictions from both models
    averaged_preds = (0.82 * preds_model1 + 0.85 * preds_model2 + 0.83 * preds_model3 + 0.84 * preds_model4) / 4
    outputs.append(averaged_preds)
    
    # Append predicted class to the list
    predictions.append(labels[torch.max(averaged_preds, 1)[1].item()])

    print(f"Processed {n} images")

Processed 0 images
Processed 1 images
Processed 2 images
Processed 3 images
Processed 4 images
Processed 5 images
Processed 6 images
Processed 7 images
Processed 8 images
Processed 9 images
Processed 10 images
Processed 11 images
Processed 12 images
Processed 13 images
Processed 14 images
Processed 15 images
Processed 16 images
Processed 17 images
Processed 18 images
Processed 19 images
Processed 20 images
Processed 21 images
Processed 22 images
Processed 23 images
Processed 24 images
Processed 25 images
Processed 26 images
Processed 27 images
Processed 28 images
Processed 29 images
Processed 30 images
Processed 31 images
Processed 32 images
Processed 33 images
Processed 34 images
Processed 35 images
Processed 36 images
Processed 37 images
Processed 38 images
Processed 39 images
Processed 40 images
Processed 41 images
Processed 42 images
Processed 43 images
Processed 44 images
Processed 45 images
Processed 46 images
Processed 47 images
Processed 48 images
Processed 49 images
Processed 

In [16]:
predictions[:10]

[96, 25, 43, 24, 6, 85, 66, 89, 18, 93]

In [17]:
len([i for i in range(len(predictions)) if predictions[i] != model1_preds[i]]), len([i for i in range(len(predictions)) if predictions[i] != model2_preds[i]]), len([i for i in range(len(predictions)) if predictions[i] != model3_preds[i]]), len([i for i in range(len(predictions)) if predictions[i] != model4_preds[i]])

(222, 87, 107, 110)

In [18]:
len([i for i in range(len(predictions)) if model2_preds[i] != model3_preds[i]])

113

In [36]:
from tools.prediction import dynamic_greedy_adjustment
outputs_np = torch.cat(outputs).cpu().numpy()
adjusted_predictions = dynamic_greedy_adjustment(outputs_np, 100, 10, base_penalty_factor=0.05, initial_penalty_increase=0.001, max_iterations=75, stability_threshold=25)
adjusted_predictions = [labels[i] for i in adjusted_predictions]

In [37]:
len([i for i in range(len(adjusted_predictions)) if adjusted_predictions[i] != predictions[i]])

38

In [38]:
import csv

output_csv_path = 'predictions.csv'

if os.path.exists(output_csv_path):
    os.remove(output_csv_path)

with open(output_csv_path, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(['ID', 'Label'])

    for i, pred in enumerate(adjusted_predictions):
        filename = os.path.basename(file_paths[i])
        csvwriter.writerow([filename, pred])

print(f"Predictions written to {output_csv_path}")

Predictions written to predictions.csv


: 