In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_dir_1 = r"C:\Users\user\Desktop\Minor sem 6\COMBINED"

transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),          
    transforms.Normalize((0.5,), (0.5,))  
])

test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

print("Classes:", test_dataset.classes)
print("Number of test samples:", len(test_dataset))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def imshow(img, title):
    img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.figure(figsize=(8, 8))
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.title(title)
    plt.axis("off")
    plt.show()

dataiter = iter(test_loader)
images, labels = next(dataiter)

imshow(images[0], title=f"Class: {test_dataset.classes[labels[0]]}")


In [None]:
import torch 
import clip 
from PIL import Image 

device="cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, preprocess=clip.load("ViT-B/32", device=device)

class_names=['electronic', 'glass', 'metal', 'organic', 'paper', 'plastic']
text_inputs=clip.tokenize([f"A photo of {c} waste" for c in class_names]).to(device)

print("Model loaded successfully")

In [None]:
import numpy as np

test_image_path = test_dataset.samples[0][0]  
image = preprocess(Image.open(test_image_path)).unsqueeze(0).to(device)

with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text_inputs)

image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)

similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()

predicted_class_idx = np.argmax(similarity)
predicted_class = class_names[predicted_class_idx]

imshow(preprocess(Image.open(test_image_path)), title=f"Predicted: {predicted_class}")


In [None]:
correct = 0
total = 0

with torch.no_grad():
    for image_path, label in test_dataset.samples:
        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

        image_features = model.encode_image(image)
        image_features /= image_features.norm(dim=-1, keepdim=True)
        text_features /= text_features.norm(dim=-1, keepdim=True)

        similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()
        predicted_class_idx = np.argmax(similarity)
    
        if predicted_class_idx == label:
            correct += 1
        total += 1

accuracy = (correct / total) * 100
print(f"Test Accuracy: {accuracy:.2f}%")


In [None]:
import os
import shutil

# Define source directories
raw_data_dir = r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Image Dataset to Classify Different Components\Raw Data\Raw Data"
augmented_data_dir = r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Image Dataset to Classify Different Components\Augmented Data\Augmented Data"

# Define the destination directory
combined_dir = r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Combined"

# Ensure the destination directory exists
os.makedirs(combined_dir, exist_ok=True)

# Get class names from the raw data folder
class_names = os.listdir(raw_data_dir)

for class_name in class_names:
    # Define the source class directories
    raw_class_dir = os.path.join(raw_data_dir, class_name)
    augmented_class_dir = os.path.join(augmented_data_dir, class_name)
    
    # Define the destination class directory
    combined_class_dir = os.path.join(combined_dir, class_name)
    os.makedirs(combined_class_dir, exist_ok=True)
    
    # Move images from raw data
    if os.path.exists(raw_class_dir):
        for file in os.listdir(raw_class_dir):
            shutil.move(os.path.join(raw_class_dir, file), os.path.join(combined_class_dir, file))
    
    # Move images from augmented data
    if os.path.exists(augmented_class_dir):
        for file in os.listdir(augmented_class_dir):
            shutil.move(os.path.join(augmented_class_dir, file), os.path.join(combined_class_dir, file))

print("Merging complete!")


In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

test_dir = r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Combined"

transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),          
    transforms.Normalize((0.5,), (0.5,))  
])

test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

print("Classes:", test_dataset.classes)
print("Number of test samples:", len(test_dataset))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def imshow(img, title):
    img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.figure(figsize=(8, 8))
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.title(title)
    plt.axis("off")
    plt.show()

dataiter = iter(test_loader)
images, labels = next(dataiter)

imshow(images[0], title=f"Class: {test_dataset.classes[labels[0]]}")

In [None]:
import torch 
import clip 
from PIL import Image 

device="cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, preprocess=clip.load("ViT-B/32", device=device)

class_names=[
    "Battery", "USBPort", "WifiCard", "Speaker", "CMOSBattery", 
    "HardDiskDrive", "WebCam", "LCDScreen", "Bezel", "HeatSink", 
    "SSD", "DVDRom", "CPUFan", "TouchPad", "Processor", 
    "RAMCover", "BasePanel", "TopPanel", "LVDSCable", "Hinge", 
    "Keyboard", "Motherboard", "DCCable", "PowerSwitch", "RAM", "ScrewKit"
]
text_inputs=clip.tokenize([f"A photo of {c} waste" for c in class_names]).to(device)

print("Model loaded successfully")

In [None]:
import numpy as np

test_image_path = test_dataset.samples[0][0]  
image = preprocess(Image.open(test_image_path)).unsqueeze(0).to(device)

with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text_inputs)

image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)

similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()

predicted_class_idx = np.argmax(similarity)
predicted_class = class_names[predicted_class_idx]

imshow(preprocess(Image.open(test_image_path)), title=f"Predicted: {predicted_class}")

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for image_path, label in test_dataset.samples:
        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

        image_features = model.encode_image(image)
        image_features /= image_features.norm(dim=-1, keepdim=True)
        text_features /= text_features.norm(dim=-1, keepdim=True)

        similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()
        predicted_class_idx = np.argmax(similarity)
    
        if predicted_class_idx == label:
            correct += 1
        total += 1

accuracy = (correct / total) * 100
print(f"Test Accuracy: {accuracy:.2f}%")

In [None]:
import os
import random
import shutil

source_train_dir = r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Combined\Train"
target_test_seen_dir = r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Combined\Test Seen"
split_ratio = 0.2  # move 20% to test_seen

os.makedirs(target_test_seen_dir, exist_ok=True)

for class_name in os.listdir(source_train_dir):
    class_dir = os.path.join(source_train_dir, class_name)
    if not os.path.isdir(class_dir):
        continue
    
    images = os.listdir(class_dir)
    num_to_move = int(len(images) * split_ratio)
    selected = random.sample(images, num_to_move)

    # Make class folder in test_seen
    target_class_dir = os.path.join(target_test_seen_dir, class_name)
    os.makedirs(target_class_dir, exist_ok=True)

    for img in selected:
        src_path = os.path.join(class_dir, img)
        dst_path = os.path.join(target_class_dir, img)
        shutil.move(src_path, dst_path)

print("Finished splitting seen class samples into test_seen.")

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.ImageFolder(root=r"C:\Users\user\Desktop\Minor sem 6\COMBINED\Train", transform=transform)
test_seen_dataset = datasets.ImageFolder(root=r"C:\Users\user\Desktop\Minor sem 6\COMBINED\Test Seen", transform=transform)
test_unseen_dataset = datasets.ImageFolder(root=r"C:\Users\user\Desktop\Minor sem 6\COMBINED\Test Unseen", transform=transform)

seen_classes = train_dataset.classes
unseen_classes = test_unseen_dataset.classes

print("Seen classes:", seen_classes)
print("Unseen classes:", unseen_classes)

model, preprocess = clip.load("ViT-B/32", device=device)

text_inputs_seen = clip.tokenize([f"A photo of {c} waste" for c in seen_classes]).to(device)
text_inputs_unseen = clip.tokenize([f"A photo of {c} waste" for c in unseen_classes]).to(device)

print("CLIP tokenizer ready.")


In [None]:
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from torchvision import datasets

device = "cuda" if torch.cuda.is_available() else "cpu"

def get_text_features(class_list, model):
    text_inputs = clip.tokenize([f"A photo of {c} waste" for c in class_list]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_inputs)
        text_features /= text_features.norm(dim=-1, keepdim=True)
    return text_inputs, text_features

def evaluate_clip(dataset, class_list, text_features, model, preprocess):
    correct = 0
    total = 0

    for image_path, label in tqdm(dataset.samples):
        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        with torch.no_grad():
            image_features = model.encode_image(image)
            image_features /= image_features.norm(dim=-1, keepdim=True)

            similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()
            predicted_class_idx = np.argmax(similarity)

            if predicted_class_idx == label:
                correct += 1
            total += 1

    return (correct / total) * 100 if total > 0 else 0

text_inputs_seen, text_features_seen = get_text_features(seen_classes, model)
text_inputs_unseen, text_features_unseen = get_text_features(unseen_classes, model)

print("🔎 Evaluating on Seen classes (GZSL)...")
acc_seen = evaluate_clip(test_seen_dataset, seen_classes, text_features_seen, model, preprocess)

print("🔎 Evaluating on Unseen classes (GZSL)...")
all_classes = seen_classes + unseen_classes
_, text_features_all = get_text_features(all_classes, model)

acc_unseen = evaluate_clip(test_unseen_dataset, all_classes, text_features_all, model, preprocess)

print("🔎 Evaluating Closed-set ZSL (Unseen only)...")
zsl_accuracy = evaluate_clip(test_unseen_dataset, unseen_classes, text_features_unseen, model, preprocess)

harmonic_mean = (2 * acc_seen * acc_unseen) / (acc_seen + acc_unseen) if (acc_seen + acc_unseen) > 0 else 0

# Final print
print("\n📊 Final Evaluation Metrics")
print(f"Seen Class Accuracy (GZSL):      {acc_seen:.2f}%")
print(f"Unseen Class Accuracy (GZSL):    {acc_unseen:.2f}%")
print(f"Harmonic Mean (GZSL):            {harmonic_mean:.2f}%")
print(f"Closed-set ZSL Accuracy:         {zsl_accuracy:.2f}%")

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.ImageFolder(root=r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Combined\Train", transform=transform)
test_seen_dataset = datasets.ImageFolder(root=r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Combined\Test Seen", transform=transform)
test_unseen_dataset = datasets.ImageFolder(root=r"C:\Users\user\Desktop\Minor sem 6\Laptop Components Combined\Test Unseen", transform=transform)

seen_classes = train_dataset.classes
unseen_classes = test_unseen_dataset.classes

print("Seen classes:", seen_classes)
print("Unseen classes:", unseen_classes)

model, preprocess = clip.load("ViT-B/32", device=device)

text_inputs_seen = clip.tokenize([f"A photo of {c} waste" for c in seen_classes]).to(device)
text_inputs_unseen = clip.tokenize([f"A photo of {c} waste" for c in unseen_classes]).to(device)

print("CLIP tokenizer ready.")

In [None]:
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from torchvision import datasets

device = "cuda" if torch.cuda.is_available() else "cpu"

def get_text_features(class_list, model):
    text_inputs = clip.tokenize([f"A photo of {c} waste" for c in class_list]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_inputs)
        text_features /= text_features.norm(dim=-1, keepdim=True)
    return text_inputs, text_features

def evaluate_clip(dataset, class_list, text_features, model, preprocess):
    correct = 0
    total = 0

    for image_path, label in tqdm(dataset.samples):
        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        with torch.no_grad():
            image_features = model.encode_image(image)
            image_features /= image_features.norm(dim=-1, keepdim=True)

            similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()
            predicted_class_idx = np.argmax(similarity)

            if predicted_class_idx == label:
                correct += 1
            total += 1

    return (correct / total) * 100 if total > 0 else 0

text_inputs_seen, text_features_seen = get_text_features(seen_classes, model)
text_inputs_unseen, text_features_unseen = get_text_features(unseen_classes, model)

print("🔎 Evaluating on Seen classes (GZSL)...")
acc_seen = evaluate_clip(test_seen_dataset, seen_classes, text_features_seen, model, preprocess)

print("🔎 Evaluating on Unseen classes (GZSL)...")
all_classes = seen_classes + unseen_classes
_, text_features_all = get_text_features(all_classes, model)

acc_unseen = evaluate_clip(test_unseen_dataset, all_classes, text_features_all, model, preprocess)

print("🔎 Evaluating Closed-set ZSL (Unseen only)...")
zsl_accuracy = evaluate_clip(test_unseen_dataset, unseen_classes, text_features_unseen, model, preprocess)

harmonic_mean = (2 * acc_seen * acc_unseen) / (acc_seen + acc_unseen) if (acc_seen + acc_unseen) > 0 else 0

# Final print
print("\n📊 Final Evaluation Metrics")
print(f"Seen Class Accuracy (GZSL):      {acc_seen:.2f}%")
print(f"Unseen Class Accuracy (GZSL):    {acc_unseen:.2f}%")
print(f"Harmonic Mean (GZSL):            {harmonic_mean:.2f}%")
print(f"Closed-set ZSL Accuracy:         {zsl_accuracy:.2f}%")

In [1]:
import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

model, preprocess = clip.load("ViT-B/32", device=device)

print("CLIP model loaded successfully!")

Using device: cuda
CLIP model loaded successfully!


In [None]:
train_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Training"
test_seen_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Test seen"
test_unseen_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Test Unseen"

In [3]:
from torchvision import datasets, transforms
import clip
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Training"
test_seen_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Test seen"
test_unseen_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Test Unseen"

# Load datasets without any filtering
train_dataset = datasets.ImageFolder(root=train_root, transform=transform)
test_seen_dataset = datasets.ImageFolder(root=test_seen_root, transform=transform)
test_unseen_dataset = datasets.ImageFolder(root=test_unseen_root, transform=transform)

print("Train classes:", train_dataset.classes)
print("Test Seen classes:", test_seen_dataset.classes)
print("Test Unseen classes:", test_unseen_dataset.classes)

# Load CLIP model and preprocessing function
model, preprocess = clip.load("ViT-B/32", device=device)

# Tokenize class names with prompt template
text_inputs_seen = clip.tokenize([f"A photo of {c} waste" for c in train_dataset.classes]).to(device)
text_inputs_unseen = clip.tokenize([f"A photo of {c} waste" for c in test_unseen_dataset.classes]).to(device)

print("CLIP tokenizer ready.")


Using device: cuda
Train classes: ['Air-Conditioner', 'Bar-Phone', 'Battery', 'Blood-Pressure-Monitor', 'Boiler', 'CRT-Monitor', 'CRT-TV', 'Calculator', 'Christmas-Lights', 'Clothes-Iron', 'Coffee-Machine', 'Compact-Fluorescent-Lamps', 'Computer-Keyboard', 'Computer-Mouse', 'Cooled-Dispenser', 'Cooling-Display', 'Dehumidifier', 'Desktop-PC', 'Digital-Oscilloscope', 'Dishwasher', 'Drone', 'Electric-Bicycle', 'Electric-Guitar', 'Electrocardiograph-Machine', 'Electronic-Keyboard', 'Exhaust-Fan', 'Flashlight', 'Flat-Panel-Monitor', 'Flat-Panel-TV', 'Floor-Fan', 'Freezer', 'Glucose-Meter', 'HDD', 'Headphone', 'LED-Bulb', 'Laptop', 'Microwave', 'Music-Player', 'Neon-Sign', 'Non-Cooled-Dispenser', 'Oven', 'PCB', 'Patient-Monitoring-System', 'Photovoltaic-Panel', 'PlayStation-5', 'Printer', 'Projector', 'Range-Hood', 'Refrigerator', 'Rotary-Mower', 'Router', 'SSD', 'Server', 'Smartphone', 'Smoke-Detector', 'Straight-Tube-Fluorescent-Lamp', 'Street-Lamp', 'TV-Remote-Control', 'Table-Lamp', 'Tab

In [None]:
!pip install git+https://github.com/openai/CLIP.git

In [4]:
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from torchvision import datasets

device = "cuda" if torch.cuda.is_available() else "cpu"

def get_text_features(class_list, model):
    text_inputs = clip.tokenize([f"A photo of {c} waste" for c in class_list]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_inputs)
        text_features /= text_features.norm(dim=-1, keepdim=True)
    return text_inputs, text_features

def evaluate_clip(dataset, class_list, text_features, model, preprocess):
    correct = 0
    total = 0

    for image_path, label in tqdm(dataset.samples):
        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        with torch.no_grad():
            image_features = model.encode_image(image)
            image_features /= image_features.norm(dim=-1, keepdim=True)

            similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()
            predicted_class_idx = np.argmax(similarity)

            if predicted_class_idx == label:
                correct += 1
            total += 1

    return (correct / total) * 100 if total > 0 else 0

text_inputs_seen, text_features_seen = get_text_features(seen_classes, model)
text_inputs_unseen, text_features_unseen = get_text_features(unseen_classes, model)

print("🔎 Evaluating on Seen classes (GZSL)...")
acc_seen = evaluate_clip(test_seen_dataset, seen_classes, text_features_seen, model, preprocess)

print("🔎 Evaluating on Unseen classes (GZSL)...")
all_classes = seen_classes + unseen_classes
_, text_features_all = get_text_features(all_classes, model)

acc_unseen = evaluate_clip(test_unseen_dataset, all_classes, text_features_all, model, preprocess)

print("🔎 Evaluating Closed-set ZSL (Unseen only)...")
zsl_accuracy = evaluate_clip(test_unseen_dataset, unseen_classes, text_features_unseen, model, preprocess)

harmonic_mean = (2 * acc_seen * acc_unseen) / (acc_seen + acc_unseen) if (acc_seen + acc_unseen) > 0 else 0

# Final print
print("\n📊 Final Evaluation Metrics")
print(f"Seen Class Accuracy (GZSL):      {acc_seen:.2f}%")
print(f"Unseen Class Accuracy (GZSL):    {acc_unseen:.2f}%")
print(f"Harmonic Mean (GZSL):            {harmonic_mean:.2f}%")
print(f"Closed-set ZSL Accuracy:         {zsl_accuracy:.2f}%")

NameError: name 'seen_classes' is not defined

In [5]:
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from torchvision import datasets, transforms
import clip

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Define your dataset paths
train_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Training"
test_seen_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Test seen"
test_unseen_root = r"C:\Users\user\Desktop\Minor sem 6\E-Waste Dataset.v44-fix-annotations-of-some-bar-phones-incorrectly-labelled-as-smartphones.multiclass\Test Unseen"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load datasets
train_dataset = datasets.ImageFolder(root=train_root, transform=transform)
test_seen_dataset = datasets.ImageFolder(root=test_seen_root, transform=transform)
test_unseen_dataset = datasets.ImageFolder(root=test_unseen_root, transform=transform)

# Extract classes
seen_classes = train_dataset.classes
unseen_classes = test_unseen_dataset.classes

print("Seen classes:", seen_classes)
print("Unseen classes:", unseen_classes)

# Load CLIP model and preprocess
model, preprocess = clip.load("ViT-B/32", device=device)

def get_text_features(class_list, model):
    text_inputs = clip.tokenize([f"A photo of {c} waste" for c in class_list]).to(device)
    with torch.no_grad():
        text_features = model.encode_text(text_inputs)
        text_features /= text_features.norm(dim=-1, keepdim=True)
    return text_inputs, text_features

def evaluate_clip(dataset, class_list, text_features, model, preprocess):
    correct = 0
    total = 0

    for image_path, label in tqdm(dataset.samples):
        image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
        with torch.no_grad():
            image_features = model.encode_image(image)
            image_features /= image_features.norm(dim=-1, keepdim=True)

            similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()
            predicted_class_idx = np.argmax(similarity)

            if predicted_class_idx == label:
                correct += 1
            total += 1

    return (correct / total) * 100 if total > 0 else 0

# Get text features
text_inputs_seen, text_features_seen = get_text_features(seen_classes, model)
text_inputs_unseen, text_features_unseen = get_text_features(unseen_classes, model)

print("🔎 Evaluating on Seen classes (GZSL)...")
acc_seen = evaluate_clip(test_seen_dataset, seen_classes, text_features_seen, model, preprocess)

print("🔎 Evaluating on Unseen classes (GZSL)...")
all_classes = seen_classes + unseen_classes
_, text_features_all = get_text_features(all_classes, model)

acc_unseen = evaluate_clip(test_unseen_dataset, all_classes, text_features_all, model, preprocess)

print("🔎 Evaluating Closed-set ZSL (Unseen only)...")
zsl_accuracy = evaluate_clip(test_unseen_dataset, unseen_classes, text_features_unseen, model, preprocess)

harmonic_mean = (2 * acc_seen * acc_unseen) / (acc_seen + acc_unseen) if (acc_seen + acc_unseen) > 0 else 0

print("\n📊 Final Evaluation Metrics")
print(f"Seen Class Accuracy (GZSL):      {acc_seen:.2f}%")
print(f"Unseen Class Accuracy (GZSL):    {acc_unseen:.2f}%")
print(f"Harmonic Mean (GZSL):            {harmonic_mean:.2f}%")
print(f"Closed-set ZSL Accuracy:         {zsl_accuracy:.2f}%")


Using device: cuda
Seen classes: ['Air-Conditioner', 'Bar-Phone', 'Battery', 'Blood-Pressure-Monitor', 'Boiler', 'CRT-Monitor', 'CRT-TV', 'Calculator', 'Christmas-Lights', 'Clothes-Iron', 'Coffee-Machine', 'Compact-Fluorescent-Lamps', 'Computer-Keyboard', 'Computer-Mouse', 'Cooled-Dispenser', 'Cooling-Display', 'Dehumidifier', 'Desktop-PC', 'Digital-Oscilloscope', 'Dishwasher', 'Drone', 'Electric-Bicycle', 'Electric-Guitar', 'Electrocardiograph-Machine', 'Electronic-Keyboard', 'Exhaust-Fan', 'Flashlight', 'Flat-Panel-Monitor', 'Flat-Panel-TV', 'Floor-Fan', 'Freezer', 'Glucose-Meter', 'HDD', 'Headphone', 'LED-Bulb', 'Laptop', 'Microwave', 'Music-Player', 'Neon-Sign', 'Non-Cooled-Dispenser', 'Oven', 'PCB', 'Patient-Monitoring-System', 'Photovoltaic-Panel', 'PlayStation-5', 'Printer', 'Projector', 'Range-Hood', 'Refrigerator', 'Rotary-Mower', 'Router', 'SSD', 'Server', 'Smartphone', 'Smoke-Detector', 'Straight-Tube-Fluorescent-Lamp', 'Street-Lamp', 'TV-Remote-Control', 'Table-Lamp', 'Tabl

  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


🔎 Evaluating on Seen classes (GZSL)...


100%|██████████████████████████████████████████████████████████████████████████████| 4214/4214 [02:35<00:00, 27.08it/s]


🔎 Evaluating on Unseen classes (GZSL)...


100%|██████████████████████████████████████████████████████████████████████████████| 1453/1453 [00:51<00:00, 28.15it/s]


🔎 Evaluating Closed-set ZSL (Unseen only)...


100%|██████████████████████████████████████████████████████████████████████████████| 1453/1453 [00:33<00:00, 43.36it/s]


📊 Final Evaluation Metrics
Seen Class Accuracy (GZSL):      5.27%
Unseen Class Accuracy (GZSL):    0.07%
Harmonic Mean (GZSL):            0.14%
Closed-set ZSL Accuracy:         73.92%



