In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchcp.classification.score import APS, RAPS
from torchcp.classification.predictor import SplitPredictor
import torch.nn.functional as F
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [None]:
#creating the model
def create_advanced_model(num_classes):
    model = models.resnet18(pretrained=True)
    in_features = model.fc.in_features
    
    model.fc = nn.Sequential(
        nn.Dropout(0.3),
        nn.Linear(in_features, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, num_classes)
    )
    return model

In [None]:
# Loading the trained and saved model
model = create_advanced_model(num_classes=3)
model.load_state_dict(torch.load("best_covid_model.pth"))
model.eval()
model = model.to(device)
print("Model loaded successfully!")



Model loaded successfully!


In [None]:
# Copying the same script from the training file to avoid errors during import
import os
from glob import glob
from pathlib import Path
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

BASE_PATH = r"C:\Users\User\Desktop\Dissertation\Code\dataset2\COVID-19_Radiography_Dataset"
COVID_PATH = r"C:\Users\User\Desktop\Dissertation\Code\dataset2\COVID-19_Radiography_Dataset\COVID\images"
NORMAL_PATH= r"C:\Users\User\Desktop\Dissertation\Code\dataset2\COVID-19_Radiography_Dataset\Normal\images"
VIRAL_PNEUMONIA_PATH=r"C:\Users\User\Desktop\Dissertation\Code\dataset2\COVID-19_Radiography_Dataset\Viral Pneumonia\images"


def load_image_paths():
    covid_images = glob(os.path.join(COVID_PATH, "*.png")) 
    normal_images = glob(os.path.join(NORMAL_PATH, "*.png"))
    viral_pneumonia_images = glob(os.path.join(VIRAL_PNEUMONIA_PATH, "*.png"))
    return covid_images, normal_images, viral_pneumonia_images

def get_eval_transform():
    return transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.Grayscale(num_output_channels=3),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

class CovidDataset(Dataset):
    def __init__(self, df, transform, class_roots):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.class_roots = class_roots

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        label = int(row['label'])
        img_name = row['Image Index']
        
        img_path = os.path.join(self.class_roots[label], img_name)
        img = Image.open(img_path).convert('RGB')
        img = self.transform(img)
        
        return img, torch.tensor(label, dtype=torch.long)

def paths_to_df(paths, labels):
    return pd.DataFrame({
        'Image Index': [Path(p).name for p in paths],
        'label': labels
    })

covid_paths, normal_paths, viral_pneumonia_paths = load_image_paths()

normal_labels = [0] * len(normal_paths)
covid_labels = [1] * len(covid_paths)
viral_pneumonia_labels = [2] * len(viral_pneumonia_paths)

all_images = normal_paths + covid_paths + viral_pneumonia_paths
all_labels = normal_labels + covid_labels + viral_pneumonia_labels


X_temp, X_test, y_temp, y_test = train_test_split(
    all_images, all_labels, test_size=0.25, random_state=42, stratify=all_labels)

X_train, X_cal, y_train, y_cal = train_test_split(
    X_temp, y_temp, test_size=0.333, random_state=42, stratify=y_temp)

eval_transform = get_eval_transform()



class_roots = {
    0: os.path.join(BASE_PATH, "Normal", "images"),
    1: os.path.join(BASE_PATH, "COVID", "images"),
    2: os.path.join(BASE_PATH, "Viral Pneumonia", "images")
}

# Create datasets and loaders
cal_df = paths_to_df(X_cal, y_cal)
test_df = paths_to_df(X_test, y_test)

cal_dataset = CovidDataset(cal_df, eval_transform, class_roots)
test_dataset = CovidDataset(test_df, eval_transform, class_roots)

cal_loader = DataLoader(cal_dataset, batch_size=32, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)


DataLoaders recreated successfully!


In [10]:
def get_predictions(model, dataloader):
    model.eval()
    all_logits = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Getting predictions"):
            images = images.to(device)
            outputs = model(images)
            all_logits.append(outputs.cpu())
            all_labels.append(labels)
    
    return torch.cat(all_logits), torch.cat(all_labels)

In [None]:
#Extracting and saving the predicitions
cal_logits, cal_labels = get_predictions(model, cal_loader)
test_logits, test_labels = get_predictions(model, test_loader)


torch.save({
    'cal_logits': cal_logits, 'cal_labels': cal_labels,
    'test_logits': test_logits, 'test_labels': test_labels
}, 'covid_model_logits.pt')

Getting predictions: 100%|██████████| 119/119 [08:50<00:00,  4.46s/it]
Getting predictions: 100%|██████████| 119/119 [08:22<00:00,  4.23s/it]


In [None]:
# Loading the saved logits
data = torch.load('covid_model_logits.pt')
cal_logits = data['cal_logits']
cal_labels = data['cal_labels']
test_logits = data['test_logits']
test_labels = data['test_labels']

print(f"Loaded predictions - Cal: {cal_logits.shape}, Test: {test_logits.shape}")

Loaded predictions - Cal: torch.Size([3785, 3]), Test: torch.Size([3789, 3])


In [None]:
# Conformal prediction with APS
from torchcp.classification.score import APS
from torchcp.classification.predictor import SplitPredictor

Setting up APS conformal predictor...


In [None]:
# Create APS predictor
aps_score = APS(score_type="softmax", randomized=True)
aps_predictor = SplitPredictor(score_function=aps_score)

# Calculate threshold with calibration logits
aps_predictor.calculate_threshold(cal_logits, cal_labels, alpha=0.1)

# Generate prediction sets for test data
aps_prediction_sets = aps_predictor.predict_with_logits(test_logits)

# Show results
print("=== APS Results ===")
print(f"Average set size: {aps_prediction_sets.sum(dim=1).float().mean():.2f}")
print(f"Coverage: {aps_prediction_sets[range(len(test_labels)), test_labels].float().mean():.3f}")

=== APS Results ===
Average set size: 0.95
Coverage: 0.899


In [None]:
raps_score = RAPS(score_type="softmax", randomized=True, penalty=5, kreg=1)
raps_predictor = SplitPredictor(score_function=raps_score)


raps_predictor.calculate_threshold(cal_logits, cal_labels, alpha=0.1)


raps_prediction_sets = raps_predictor.predict_with_logits(test_logits)


print("=== RAPS Results ===")
print(f"Average set size: {raps_prediction_sets.sum(dim=1).float().mean():.2f}")
print(f"Coverage: {raps_prediction_sets[range(len(test_labels)), test_labels].float().mean():.3f}")



=== RAPS Results ===
Average set size: 0.93
Coverage: 0.906


In [None]:
# Try different RAPS parameters for even better results
print("=== RAPS Parameter Tuning ===")
print("Penalty | kreg | Set Size | Coverage")
print("-" * 40)

for penalty in [0.01, 0.05, 0.1, 0.2,1,5]:
   for kreg in [0, 1, 2]:
       raps_score = RAPS(score_type="softmax", randomized=True, penalty=penalty, kreg=kreg)
       raps_predictor = SplitPredictor(score_function=raps_score)
       
       # Calculating the threshold and predict
       raps_predictor.calculate_threshold(cal_logits, cal_labels, alpha=0.1)
       prediction_sets = raps_predictor.predict_with_logits(test_logits)
       
       # Metric calculation
       avg_set_size = prediction_sets.sum(dim=1).float().mean().item()
       coverage = prediction_sets[range(len(test_labels)), test_labels].float().mean().item()
       
       print(f"  {penalty:4.2f}  |  {kreg}   |  {avg_set_size:.3f}   |  {coverage:.3f}")

=== RAPS Parameter Tuning ===
Penalty | kreg | Set Size | Coverage
----------------------------------------
  0.01  |  0   |  0.951   |  0.901
  0.01  |  1   |  0.941   |  0.895
  0.01  |  2   |  0.947   |  0.898
  0.05  |  0   |  0.954   |  0.912
  0.05  |  1   |  0.947   |  0.906
  0.05  |  2   |  0.951   |  0.904
  0.10  |  0   |  0.940   |  0.905
  0.10  |  1   |  0.934   |  0.899
  0.10  |  2   |  0.944   |  0.896
  0.20  |  0   |  0.928   |  0.898
  0.20  |  1   |  0.927   |  0.897
  0.20  |  2   |  0.946   |  0.897
  1.00  |  0   |  0.926   |  0.901
  1.00  |  1   |  0.933   |  0.907
  1.00  |  2   |  0.944   |  0.895
  5.00  |  0   |  0.921   |  0.897
  5.00  |  1   |  0.921   |  0.896
  5.00  |  2   |  0.952   |  0.899


In [None]:
# Comparing both the methods 
print("\n=== Comparison ===")
print(f"APS  - Set size: {aps_prediction_sets.sum(dim=1).float().mean():.2f}, Coverage: {aps_prediction_sets[range(len(test_labels)), test_labels].float().mean():.3f}")
#print(f"RAPS - Set size: {raps_prediction_sets.sum(dim=1).float().mean():.2f}, Coverage: {raps_prediction_sets[range(len(test_labels)), test_labels].float().mean():.3f}")


=== Comparison ===
APS  - Set size: 0.96, Coverage: 0.912
RAPS - Set size: 0.93, Coverage: 0.904
