# === 🔐 Upload Kaggle API Token ===

In [None]:
from google.colab import files
uploaded = files.upload()  # Prompt user to upload any file

# Optional: verify only kaggle.json is uploaded
import os
if "kaggle.json" in uploaded:
    os.environ["KAGGLE_CONFIG_DIR"] = "/content"
    with open("kaggle.json", "wb") as f:
        f.write(uploaded["kaggle.json"])
    print("✅ Kaggle API key uploaded securely.")
else:
    print("⚠️ Please upload 'kaggle.json'")

Saving kaggle.json to kaggle.json
✅ Kaggle API key uploaded securely.


In [None]:
# Lists files in current directory
import os
print(os.listdir())

['.config', 'kaggle.json', 'sample_data']


# === 📦 Download Datasets from Kaggle ===
Using subirbiswas19/skin-disease-dataset as an example for the analyses below

In [None]:
import kagglehub

# Download skin diseases dataset
path = kagglehub.dataset_download("ismailpromus/skin-diseases-image-dataset")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/skin-diseases-image-dataset


# === 📁 Explore the File Tree ===

In [None]:
import os

base_path = '/kaggle/input/skin-diseases-image-dataset'  # This is where your datasets were extracted

for dataset in os.listdir(base_path):
    print(f"\nDataset: {dataset}")
    dataset_path = os.path.join(base_path, dataset)
    for version in os.listdir(dataset_path):
        version_path = os.path.join(dataset_path, version)
        print(f"  → Version: {version}")
        for root, dirs, files in os.walk(version_path):
            print(f"    📂 {root}")
            if dirs:
                print(f"      ├─ Subfolders (classes?): {dirs}")
            if files:
                print(f"      ├─ Sample files: {files[:3]}")
            break  # Only show the top-level folders/files



Dataset: IMG_CLASSES
  → Version: 1. Eczema 1677
    📂 /kaggle/input/skin-diseases-image-dataset/IMG_CLASSES/1. Eczema 1677
      ├─ Sample files: ['t-factitial-dermatitis-1.jpg', 'v-eczema-areola-13.jpg', 't-eczema-subacute-66.jpg']
  → Version: 10. Warts Molluscum and other Viral Infections - 2103
    📂 /kaggle/input/skin-diseases-image-dataset/IMG_CLASSES/10. Warts Molluscum and other Viral Infections - 2103
      ├─ Sample files: ['v-herpes-zoster-168.jpg', '5_0.jpg', 't-herpes-type-1-recurrent-41.jpg']
  → Version: 4. Basal Cell Carcinoma (BCC) 3323
    📂 /kaggle/input/skin-diseases-image-dataset/IMG_CLASSES/4. Basal Cell Carcinoma (BCC) 3323
      ├─ Sample files: ['ISIC_0060274.jpg', 'ISIC_0058084.jpg', 'ISIC_0072871.jpg']
  → Version: 7. Psoriasis pictures Lichen Planus and related diseases - 2k
    📂 /kaggle/input/skin-diseases-image-dataset/IMG_CLASSES/7. Psoriasis pictures Lichen Planus and related diseases - 2k
      ├─ Sample files: ['17_21.jpg', '5_0.jpg', 't-Psoriasis-H

# === 🧠 MobileNetV2 Training Pipeline ===

In [None]:
import os
import torch
from torchvision import datasets, transforms, models
from torch import nn, optim
from torch.utils.data import DataLoader, random_split
import pickle

In [None]:
# Paths and training parameters
data_dir = "/kaggle/input/skin-diseases-image-dataset/IMG_CLASSES"
batch_size = 32
num_epochs = 5  # Lowered for faster testing/training in hackathon

# Transform: Resize + Normalize (better for pretrained models)
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Speeds up training while keeping image quality
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet normalization
                         std=[0.229, 0.224, 0.225])
])

# Load dataset and split
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
num_classes = len(dataset.classes)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=batch_size, num_workers=2)

# === Load Pretrained MobileNetV2 ===
model = models.mobilenet_v2(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

# === Define Loss and Optimizer ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss() # Good for multiclass classification
optimizer = optim.Adam(model.parameters(), lr=0.0005) # Lower LR for stable training

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {total_loss:.4f}")

# === Save Model as .pkl for AI Inference Chatbot ===
with open("promus_skin-diseases_model.pkl", "wb") as f: # Rename .pkl file depending on dataset
    pickle.dump(model, f)

print("✅ Model saved as promus_skin-diseases_model.pkl") # Change output statement depending on .pkl dataset

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 110MB/s]


Epoch [1/5] - Loss: 605.5136
Epoch [2/5] - Loss: 471.6553
Epoch [3/5] - Loss: 408.2691
Epoch [4/5] - Loss: 365.2166
Epoch [5/5] - Loss: 315.6733
✅ Model saved as promus_skin-diseases_model.pkl


# Testing the ML Model!

In [2]:
import torch
import pickle

# Load model
with open("/promus-skin_diseases.pkl", "rb") as file:
    model = pickle.load(file)
model.eval()  # Set model to evaluation mode

from torchvision import transforms
from PIL import Image

transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Same as training
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [3]:
def predict_image(image_path, model, class_names):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    image = Image.open(image_path).convert("RGB")
    input_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        probs = torch.nn.functional.softmax(output[0], dim=0)

    top_probs, top_classes = torch.topk(probs, 3)
    results = [(class_names[i], float(p)*100) for i, p in zip(top_classes, top_probs)]

    return results

In [7]:
from google.colab import files
uploaded = files.upload()  # Pick a skin image file

image_path = next(iter(uploaded))
class_names = [
    'Eczema',
    'Melanoma',
    'Atopic Dermatitis',
    'Basal Cell Carcinoma',
    'Melanocytic Nevi',
    'Benign Keratosis-like Lesions',
    'Psoriasis',
    'Seborrheic Keratose and other Benign Tumours',
    'Vascular Tumors',
    'Tinea Ringworm Candidiasis and other Fungal Infections'
]

predictions = predict_image(image_path, model, class_names)

# Print results
for label, prob in predictions:
    print(f"{label}: {prob:.2f}%")

Saving mahnoor-hand.jpg to mahnoor-hand (1).jpg
Benign Keratosis-like Lesions: 99.87%
Atopic Dermatitis: 0.10%
Melanocytic Nevi: 0.03%
