# 🩺 SwasthVedha Skin Disease Classification - OPTIMIZED ResNet50
## GPU-Accelerated Training with Hair Disease Model Techniques

**Target**: Improve from 44.83% to 90%+ accuracy
**Model**: ResNet50 with advanced transfer learning
**Strategy**: Apply same techniques that achieved 100% on Hair Disease


## 🔧 GPU Setup & Libraries

In [None]:
# GPU Check and imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from PIL import Image
import json
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name()}")
    print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## 📂 Upload Your Skin Disease Dataset

**Instructions:**
1. Zip your skin disease dataset folder
2. Make sure it has `train/`, `val/`, `test/` folders
3. Upload the zip file below

In [None]:
from google.colab import files
import zipfile

print("Please upload your Skin Disease dataset (zipped):")
uploaded = files.upload()

# Extract the uploaded zip file
for filename in uploaded.keys():
    print(f"Extracting {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall('/content/')
    print(f"Extracted {filename}")

# Find the dataset directory
dataset_paths = []
for root, dirs, files in os.walk('/content/'):
    if ('train' in dirs or 'train_set' in dirs) and ('test' in dirs or 'test_set' in dirs):
        dataset_paths.append(root)

if dataset_paths:
    data_dir = dataset_paths[0]
    print(f"Dataset found at: {data_dir}")
    
    # Check for different folder naming conventions
    if os.path.exists(os.path.join(data_dir, 'train_set')):
        train_dir = os.path.join(data_dir, 'train_set')
        test_dir = os.path.join(data_dir, 'test_set')
        val_dir = os.path.join(data_dir, 'val_set') if os.path.exists(os.path.join(data_dir, 'val_set')) else None
    else:
        train_dir = os.path.join(data_dir, 'train')
        test_dir = os.path.join(data_dir, 'test')
        val_dir = os.path.join(data_dir, 'val') if os.path.exists(os.path.join(data_dir, 'val')) else None
    
    print(f"Train dir: {train_dir}")
    print(f"Test dir: {test_dir}")
    print(f"Val dir: {val_dir}")
    
else:
    print("Dataset structure not found. Please ensure your zip contains train/test folders.")
    data_dir = '/content/skin_disease'  # fallback

## 🔄 Advanced Data Preprocessing (Hair Disease Model Style)

In [None]:
# Advanced transforms - same as Hair Disease model that got 100%
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.3),
        transforms.RandomRotation(30),
        transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
        transforms.RandomAffine(degrees=0, translate=(0.15, 0.15), scale=(0.85, 1.15)),
        transforms.RandomPerspective(distortion_scale=0.2, p=0.3),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.RandomErasing(p=0.2, scale=(0.02, 0.15))
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Create datasets
if val_dir and os.path.exists(val_dir):
    # Use separate validation set
    image_datasets = {
        'train': datasets.ImageFolder(train_dir, data_transforms['train']),
        'val': datasets.ImageFolder(val_dir, data_transforms['val']),
        'test': datasets.ImageFolder(test_dir, data_transforms['test'])
    }
else:
    # Split training set for validation
    full_train_dataset = datasets.ImageFolder(train_dir, data_transforms['train'])
    train_size = int(0.8 * len(full_train_dataset))
    val_size = len(full_train_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(full_train_dataset, [train_size, val_size])
    
    # Apply validation transforms to validation split
    val_dataset.dataset.transform = data_transforms['val']
    
    image_datasets = {
        'train': train_dataset,
        'val': val_dataset,
        'test': datasets.ImageFolder(test_dir, data_transforms['test'])
    }

# Create data loaders with optimized settings
dataloaders = {
    'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=24, shuffle=True, num_workers=4, pin_memory=True),
    'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=4, pin_memory=True),
    'test': torch.utils.data.DataLoader(image_datasets['test'], batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}

# Get class names
if hasattr(image_datasets['train'], 'classes'):
    class_names = image_datasets['train'].classes
else:
    class_names = image_datasets['train'].dataset.classes

print(f"Dataset sizes:")
for phase in ['train', 'val', 'test']:
    print(f"  {phase}: {dataset_sizes[phase]} images")
    
print(f"\nClasses ({len(class_names)}): {class_names}")
print(f"Device: {device}")

## 🤖 Optimized ResNet50 Model (Hair Disease Architecture)

In [None]:
def create_optimized_skin_model(num_classes):
    """Create optimized ResNet50 model - same as Hair Disease model"""
    # Load pre-trained ResNet50
    model = models.resnet50(pretrained=True)
    
    # Freeze early layers (same as Hair model)
    for param in model.parameters():
        param.requires_grad = False
    
    # Unfreeze last layers for fine-tuning
    for param in model.layer4.parameters():
        param.requires_grad = True
    for param in model.layer3[-1].parameters():
        param.requires_grad = True
    
    # Advanced classifier (improved from Hair model)
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(0.6),
        nn.Linear(num_ftrs, 1024),
        nn.BatchNorm1d(1024),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(1024, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, num_classes)
    )
    
    return model

# Create model
model = create_optimized_skin_model(len(class_names))
model = model.to(device)

# Advanced loss function with label smoothing
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = -1

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

criterion = LabelSmoothingLoss(len(class_names), smoothing=0.1)

# Advanced optimizer with different learning rates
optimizer = optim.AdamW([
    {'params': model.layer3[-1].parameters(), 'lr': 5e-5},
    {'params': model.layer4.parameters(), 'lr': 1e-4},
    {'params': model.fc.parameters(), 'lr': 1e-3}
], weight_decay=1e-4)

# Advanced scheduler
scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=[5e-5, 1e-4, 1e-3], 
                                   steps_per_epoch=len(dataloaders['train']), 
                                   epochs=40, pct_start=0.3)

print(f"Model created with {len(class_names)} classes")
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

## 🚀 Advanced Training Function

In [None]:
def train_optimized_model(model, criterion, optimizer, scheduler, num_epochs=40):
    """Advanced training with all optimizations"""
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    patience = 8
    patience_counter = 0
    
    # Training history
    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': []
    }
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 50)
        
        # Each epoch has training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            
            # Progress tracking
            total_batches = len(dataloaders[phase])
            
            for batch_idx, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    # Mixed precision for speed
                    with torch.cuda.amp.autocast():
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        _, preds = torch.max(outputs, 1)
                    
                    if phase == 'train':
                        loss.backward()
                        # Gradient clipping
                        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                        optimizer.step()
                        scheduler.step()
                        
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
                # Progress update every 20 batches
                if batch_idx % 20 == 0:
                    print(f'  {phase.capitalize()} batch {batch_idx+1}/{total_batches}', end='\r')
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            
            # Save history
            history[f'{phase}_loss'].append(epoch_loss)
            history[f'{phase}_acc'].append(epoch_acc.item())
            
            # Save best model and early stopping
            if phase == 'val':
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                    patience_counter = 0
                    
                    # Save checkpoint
                    torch.save({
                        'epoch': epoch + 1,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'best_acc': best_acc,
                        'class_names': class_names
                    }, f'/content/skin_resnet50_best.pth')
                    
                    print(f'*** New best validation accuracy: {best_acc:.4f} ***')
                else:
                    patience_counter += 1
                    
                # Early stopping
                if patience_counter >= patience:
                    print(f'Early stopping triggered after {patience} epochs without improvement')
                    break
        
        print(f'Current best val Acc: {best_acc:.4f}\n')
        
        # Break from outer loop too if early stopping
        if patience_counter >= patience:
            break
        
    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:.4f}')
    
    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model, history, best_acc

## 🔥 Start Optimized Training

In [None]:
# Start training
print("🚀 Starting OPTIMIZED Skin Disease Classification Training...")
print(f"Training on {dataset_sizes['train']} images")
print(f"Validating on {dataset_sizes['val']} images")
print(f"Testing on {dataset_sizes['test']} images")
print(f"Classes: {len(class_names)}")
print(f"Device: {device}")
print(f"Target: Improve from 44.83% to 90%+\n")

# Train the model
model, history, best_val_acc = train_optimized_model(model, criterion, optimizer, scheduler, num_epochs=40)

## 📊 Comprehensive Model Testing

In [None]:
# Comprehensive testing function
def comprehensive_test(model):
    """Comprehensive testing with detailed metrics"""
    model.eval()
    running_corrects = 0
    all_preds = []
    all_labels = []
    class_correct = list(0. for i in range(len(class_names)))
    class_total = list(0. for i in range(len(class_names)))
    
    with torch.no_grad():
        for inputs, labels in dataloaders['test']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            running_corrects += torch.sum(preds == labels.data)
            
            # Store for detailed analysis
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Per-class accuracy
            c = (preds == labels).squeeze()
            for i in range(labels.size(0)):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
    
    test_acc = running_corrects.double() / dataset_sizes['test']
    print(f'🎯 FINAL TEST ACCURACY: {test_acc:.4f} ({test_acc*100:.2f}%)')
    
    # Per-class accuracy
    print('\n📊 Per-class accuracy:')
    for i in range(len(class_names)):
        if class_total[i] > 0:
            acc = 100 * class_correct[i] / class_total[i]
            print(f'{class_names[i]}: {acc:.1f}% ({int(class_correct[i])}/{int(class_total[i])})')
    
    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix - Skin Disease Classification')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # Classification report
    print('\n📋 Detailed Classification Report:')
    print(classification_report(all_labels, all_preds, target_names=class_names))
    
    return test_acc, all_preds, all_labels

# Run comprehensive testing
test_accuracy, predictions, true_labels = comprehensive_test(model)

## 📈 Training Results Visualization

In [None]:
# Plot comprehensive training history
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Training & Validation Accuracy
ax1.plot(history['train_acc'], label='Training Accuracy', linewidth=2)
ax1.plot(history['val_acc'], label='Validation Accuracy', linewidth=2)
ax1.set_title('Model Accuracy Over Time', fontsize=14, fontweight='bold')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.set_ylim([0, 1.1])

# Training & Validation Loss
ax2.plot(history['train_loss'], label='Training Loss', linewidth=2, color='red')
ax2.plot(history['val_loss'], label='Validation Loss', linewidth=2, color='orange')
ax2.set_title('Model Loss Over Time', fontsize=14, fontweight='bold')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Accuracy improvement comparison
old_acc = 44.83
new_acc = float(test_accuracy * 100)
improvement = new_acc - old_acc

ax3.bar(['Previous Model\n(ResNet50)', 'Optimized Model\n(This Training)'], 
        [old_acc, new_acc], 
        color=['lightcoral', 'lightgreen'], 
        edgecolor=['red', 'darkgreen'], linewidth=2)
ax3.set_title(f'Accuracy Improvement: +{improvement:.1f}%', fontsize=14, fontweight='bold')
ax3.set_ylabel('Test Accuracy (%)')
ax3.set_ylim([0, 100])
for i, v in enumerate([old_acc, new_acc]):
    ax3.text(i, v + 2, f'{v:.1f}%', ha='center', fontweight='bold', fontsize=12)

# Performance grade
if new_acc >= 90:
    grade = 'A+ (Exceptional)'
    color = 'darkgreen'
elif new_acc >= 80:
    grade = 'A (Excellent)'
    color = 'green'
elif new_acc >= 70:
    grade = 'B (Good)'
    color = 'orange'
elif new_acc >= 60:
    grade = 'C (Fair)'
    color = 'yellow'
else:
    grade = 'D (Needs Improvement)'
    color = 'red'

ax4.text(0.5, 0.6, f'FINAL GRADE', ha='center', fontsize=16, fontweight='bold')
ax4.text(0.5, 0.4, grade, ha='center', fontsize=20, fontweight='bold', color=color)
ax4.text(0.5, 0.2, f'Test Accuracy: {new_acc:.2f}%', ha='center', fontsize=14)
ax4.set_xlim([0, 1])
ax4.set_ylim([0, 1])
ax4.axis('off')

plt.tight_layout()
plt.show()

# Print comprehensive results
print(f"\n🎯 COMPREHENSIVE RESULTS SUMMARY:")
print(f"=" * 50)
print(f"📊 Previous Model Accuracy:     {old_acc:.2f}%")
print(f"🚀 New Optimized Accuracy:     {new_acc:.2f}%")
print(f"📈 Improvement:                +{improvement:.2f}%")
print(f"🏆 Performance Grade:          {grade}")
print(f"✅ Best Validation Accuracy:   {float(best_val_acc)*100:.2f}%")
print(f"\n🎉 Model Status: {'PRODUCTION READY!' if new_acc >= 80 else 'Needs More Training'}")

## 💾 Save & Download Optimized Model

In [None]:
# Save comprehensive model with all metadata
final_model_path = '/content/skin_disease_resnet50_optimized.pth'
torch.save({
    'model_state_dict': model.state_dict(),
    'class_names': class_names,
    'test_accuracy': float(test_accuracy),
    'val_accuracy': float(best_val_acc),
    'improvement': float(test_accuracy * 100 - 44.83),
    'history': history,
    'model_architecture': 'Optimized ResNet50',
    'training_details': {
        'optimizer': 'AdamW with different LRs',
        'scheduler': 'OneCycleLR',
        'augmentations': 'Advanced (10+ transforms)',
        'label_smoothing': 0.1,
        'mixed_precision': True,
        'gradient_clipping': True
    }
}, final_model_path)

# Save class mapping
class_mapping = {str(i): class_name for i, class_name in enumerate(class_names)}
with open('/content/skin_class_mapping.json', 'w') as f:
    json.dump(class_mapping, f, indent=2)

# Save comprehensive model info
new_acc = float(test_accuracy * 100)
improvement = new_acc - 44.83

model_info = {
    "model_name": "Skin Disease Classification ResNet50 OPTIMIZED",
    "architecture": "ResNet50 with advanced classifier",
    "num_classes": len(class_names),
    "classes": class_names,
    "performance": {
        "test_accuracy": f"{new_acc:.4f}",
        "val_accuracy": f"{float(best_val_acc)*100:.4f}",
        "previous_accuracy": "44.83",
        "improvement": f"{improvement:.2f}%",
        "grade": grade
    },
    "technical_specs": {
        "input_size": [224, 224],
        "preprocessing": "ImageNet normalization + advanced augmentation",
        "optimizer": "AdamW with layer-specific learning rates",
        "scheduler": "OneCycleLR",
        "regularization": "Dropout + BatchNorm + Weight Decay + Label Smoothing"
    },
    "training_environment": "Google Colab GPU",
    "production_ready": new_acc >= 80
}

with open('/content/skin_model_info.json', 'w') as f:
    json.dump(model_info, f, indent=2)

print("✅ Model saved successfully!")
print(f"📁 Files saved:")
print(f"   - {final_model_path}")
print(f"   - /content/skin_class_mapping.json")
print(f"   - /content/skin_model_info.json")

# Download files
print("\n📥 Downloading files...")
try:
    files.download('/content/skin_disease_resnet50_optimized.pth')
    files.download('/content/skin_class_mapping.json')
    files.download('/content/skin_model_info.json')
    print("\n🎉 All files downloaded successfully!")
    print(f"\n🚀 SKIN DISEASE MODEL OPTIMIZATION COMPLETE!")
    print(f"📊 Improved from 44.83% to {new_acc:.2f}% (+{improvement:.1f}%)")
except Exception as e:
    print(f"Download error: {e}")
    print("You can manually download the files from the Files panel on the left")

## 🔧 SwasthVedha Integration Guide

### **Integration Steps:**

1. **Place downloaded files** in your SwasthVedha project:
   ```
   SwasthVedha/backend/models/
   ├── skin_disease_resnet50_optimized.pth
   ├── skin_class_mapping.json
   └── skin_model_info.json
   ```

2. **Update your backend code**:
   ```python
   import torch
   from torchvision import transforms
   import json

   # Load optimized model
   checkpoint = torch.load('models/skin_disease_resnet50_optimized.pth')
   model.load_state_dict(checkpoint['model_state_dict'])
   model.eval()

   # Load classes
   with open('models/skin_class_mapping.json', 'r') as f:
       class_mapping = json.load(f)
   ```

3. **Expected Performance**:
   - ✅ Significant improvement over 44.83%
   - ✅ Production-ready if >80% accuracy
   - ✅ Same quality as your Hair Disease model

**🎯 Mission Complete: Transform your skin disease model from moderate to excellent performance!**
