<a href="https://colab.research.google.com/github/JoaoFelipe08/Hack6ix/blob/main/HT6-ML_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# === 🔐 Upload Kaggle API Token ===

In [None]:
from google.colab import files
uploaded = files.upload()  # Prompt user to upload any file

# Optional: verify only kaggle.json is uploaded
import os
if "kaggle.json" in uploaded:
    os.environ["KAGGLE_CONFIG_DIR"] = "/content"
    with open("kaggle.json", "wb") as f:
        f.write(uploaded["kaggle.json"])
    print("✅ Kaggle API key uploaded securely.")
else:
    print("⚠️ Please upload 'kaggle.json'")

Saving kaggle.json to kaggle.json
✅ Kaggle API key uploaded securely.


In [None]:
# Lists files in current directory
import os
print(os.listdir())

['.config', 'kaggle_API', 'sample_data']


# === 📦 Download Datasets from Kaggle ===
Using subirbiswas19/skin-disease-dataset as an example for the analyses below

In [None]:
import kagglehub

# Download skin diseases dataset
path = kagglehub.dataset_download("ismailpromus/skin-diseases-image-dataset")
print("Path to dataset files:", path)

# Download skin cancer dataset
path = kagglehub.dataset_download("fanconic/skin-cancer-malignant-vs-benign")
print("Path to dataset files:", path)

# Download another skin disease dataset (bacterial/fungal infections)
path = kagglehub.dataset_download("subirbiswas19/skin-disease-dataset")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/skin-disease-dataset


# === 📁 Explore the File Tree ===

In [None]:
import os

base_path = '/kaggle/input/skin-disease-dataset/skin-disease-datasaet/test_set'  # This is where your datasets were extracted

for dataset in os.listdir(base_path):
    print(f"\nDataset: {dataset}")
    dataset_path = os.path.join(base_path, dataset)
    for version in os.listdir(dataset_path):
        version_path = os.path.join(dataset_path, version)
        print(f"  → Version: {version}")
        for root, dirs, files in os.walk(version_path):
            print(f"    📂 {root}")
            if dirs:
                print(f"      ├─ Subfolders (classes?): {dirs}")
            if files:
                print(f"      ├─ Sample files: {files[:3]}")
            break  # Only show the top-level folders/files



Dataset: FU-nail-fungus
  → Version: _52_305.jpg
  → Version: _0_5445.jpg
  → Version: _33_237.jpg
  → Version: _5_8221.jpg
  → Version: _67_8050.jpg
  → Version: _56_4689.jpg
  → Version: _48_1583.jpg
  → Version: _12_2082.jpg
  → Version: _39_8004.jpg
  → Version: _19_7696.jpg
  → Version: _62_6442.jpg
  → Version: _39_1950.jpg
  → Version: _30_9855.jpg
  → Version: _26_6729.jpg
  → Version: _28_4461.jpg
  → Version: _70_1578.jpg
  → Version: _40_7849.jpg
  → Version: _53_2402.jpg
  → Version: _20_160.jpg
  → Version: _22_9889.jpg
  → Version: _67_646.jpg
  → Version: _10_1593.jpg
  → Version: _25_9641.jpg
  → Version: _31_2446.jpg
  → Version: _11_8230.jpg
  → Version: _6_2845.jpg
  → Version: _33_8330.jpg
  → Version: _44_3754.jpg
  → Version: _47_6313.jpg
  → Version: _57_1019.jpg
  → Version: _55_5110.jpg
  → Version: _15_9151.jpg
  → Version: _59_1389.jpg

Dataset: FU-ringworm
  → Version: 76_FU-ringworm (22).jpg
  → Version: 40_FU-ringworm (17).jpg
  → Version: 107_FU-ringworm

# === 🧠 MobileNetV2 Training Pipeline ===

In [None]:
import os
import torch
from torchvision import datasets, transforms, models
from torch import nn, optim
from torch.utils.data import DataLoader, random_split
import pickle

In [None]:
# Paths and training parameters
data_dir = "/kaggle/input/skin-disease-dataset/skin-disease-datasaet/test_set"
batch_size = 32
num_epochs = 5  # Lowered for faster testing/training in hackathon

# Transform: Resize + Normalize (better for pretrained models)
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Speeds up training while keeping image quality
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet normalization
                         std=[0.229, 0.224, 0.225])
])

# Load dataset and split
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
num_classes = len(dataset.classes)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=batch_size, num_workers=2)

# === Load Pretrained MobileNetV2 ===
model = models.mobilenet_v2(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

# === Define Loss and Optimizer ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.CrossEntropyLoss() # Good for multiclass classification
optimizer = optim.Adam(model.parameters(), lr=0.0005) # Lower LR for stable training

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {total_loss:.4f}")

# === Save Model as .pkl for AI Inference Chatbot ===
with open("biswas-skin_diseases_model.pkl", "wb") as f: # Rename .pkl file depending on dataset
    pickle.dump(model, f)

print("✅ Model saved as biswas-skin_diseases_model.pkl") # Change output statement depending on .pkl dataset

Epoch [1/5] - Loss: 8.7282
Epoch [2/5] - Loss: 1.8213
Epoch [3/5] - Loss: 0.4404
Epoch [4/5] - Loss: 0.1180
Epoch [5/5] - Loss: 0.0518
✅ Model saved as biswas-skin_diseases_model.pkl
