In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import torch

from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms


In [3]:
from data_utils import load_ham10000_dataset, LESION_TYPE

In [4]:
import clip

In [5]:
device = "cuda" if torch.cuda.is_available() else 'cpu'
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

# CLIP Zero-Shot Classification

In [6]:
ham_train, ham_test = load_ham10000_dataset(transform=clip_preprocess)

print(f"Train size: {len(ham_train)}")
print(f"Test size: {len(ham_test)}")
print(ham_train)
print(ham_test)




Loading HAM10000 dataset...
Train size: 9013
Test size: 1002
<torch.utils.data.dataset.Subset object at 0x0000014481429D60>
<torch.utils.data.dataset.Subset object at 0x0000014481429FA0>


In [7]:
BATCH_SIZE = 128

In [8]:
def clip_zero_shot(data_set, classes):
    # https://colab.research.google.com/drive/1IqJfogZdC61dgE4BDQILCJS-zUiphD4y?authuser=2#scrollTo=EuZFg3ZlHOVD
    data_loader = DataLoader(data_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    # Encode text features here
    text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}, a type of skin lesion.") for c in classes]).to(device)
    with torch.no_grad():
        text_features = clip_model.encode_text(text_inputs)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    # Encode image features here
    correct = 0
    total = 0
    for image, label in tqdm(data_loader):
        image, label = image.to(device), label.to(device)
        with torch.no_grad():
            image_features = clip_model.encode_image(image)
        image_features /= image_features.norm(dim=-1, keepdim=True)
        similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
        _, pred = similarity.max(dim=-1)
        correct += (pred == label).sum().item()
        total += len(label)

    return correct / total

In [9]:
lesion_classes = LESION_TYPE.values() # This was probably only because the class labels were numbers, not strs

In [10]:
accuracy = clip_zero_shot(data_set=ham_train, classes=lesion_classes)
print(f"\nAccuracy = {100*accuracy:.3f}%")

  0%|          | 0/141 [00:00<?, ?it/s]


Accuracy = 21.258%


# CLIP Linear-Probe Classification

## Logistic Regression

In [11]:
from sklearn.linear_model import LogisticRegression

In [12]:
def get_features(data_set):
    all_features = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(DataLoader(data_set, batch_size=BATCH_SIZE)):
            features = clip_model.encode_image(images.to(device))
            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

In [None]:
# Calculate the image features
train_features, train_labels = get_features(ham_train)
test_features, test_labels = get_features(ham_test)

In [41]:
# Perform logistic regression
classifier = LogisticRegression(random_state=0, C=0.316, max_iter=10000, verbose=1, n_jobs=-1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(float))
print(f"\nAccuracy = {100*accuracy:.3f}%")

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/16 [00:00<?, ?it/s]

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   33.4s finished



Accuracy = 80.739%


## SVM

In [27]:
from sklearn import svm

In [31]:
# Perform logistic regression
classifier = svm.SVC(random_state=0, C=0.316, max_iter=5000, verbose=1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(float))
print(f"\nAccuracy = {100*accuracy:.3f}%")

[LibSVM]
Accuracy = 73.752%


# K-Means Clusteriungfrom scipy import stats

In [24]:
from scipy import stats

In [25]:
def knn(x_train, y_train, x_test, y_test, K=5):
    # Needs code here
    test_pred = []
    for i in tqdm(range(len(x_test))):
        distance = np.linalg.norm(x_train - x_test[i], axis=-1)
        indices = np.argsort(distance)[:K]
        neighbors_labels = y_train[indices]
        test_pred.append(stats.mode(neighbors_labels).mode[0])

    correct = (test_pred == y_test).sum()
    total = len(y_test)

    return correct / total

In [26]:
accuracy = knn(train_features, train_labels, test_features, test_labels, K=1)
print(f"\nAccuracy = {100*accuracy:.3f}%")

  0%|          | 0/1002 [00:00<?, ?it/s]

  test_pred.append(stats.mode(neighbors_labels).mode[0])



Accuracy = 75.549%


In [32]:
from sklearn.cluster import KMeans

In [35]:
# Perform logistic regression
classifier = KMeans(n_clusters=7)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(float))
print(f"\nAccuracy = {100*accuracy:.3f}%")




Accuracy = 11.477%


# Random Forest

In [37]:
from sklearn.ensemble import RandomForestClassifier

In [40]:
# Perform logistic regression
classifier = RandomForestClassifier(random_state=0, verbose=1, n_jobs=-1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(float))
print(f"\nAccuracy = {100*accuracy:.3f}%")

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.2s



Accuracy = 72.455%


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.7s finished
[Parallel(n_jobs=24)]: Using backend ThreadingBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:    0.0s
[Parallel(n_jobs=24)]: Done 100 out of 100 | elapsed:    0.0s finished


# ResNet 50 (Zero-Shot)

In [1]:
resnet50 = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_resnet50', pretrained=True)
resnet50.eval()

NameError: name 'torch' is not defined

In [2]:
resnet50.to(device)

NameError: name 'resnet50' is not defined

In [3]:
def evaluate(model, dataloader):
    num_correct = 0
    total = 0
    for images, labels in tqdm(dataloader):
        images.to(device)
        labels.to(device)
        outputs = model(images)
        num_correct += torch.sum(labels == outputs)
        total += len(labels)
    return num_correct / total

In [None]:
def train(model, dataloader, epochs=100):
    for epoch in tqdm(epochs):
        for images, labels in tqdm(dataloader):
            