In [None]:
# Set paths
train_dir = 'Train'
test_dir = 'Test'

def get_data_info(data_dir):
    classes = os.listdir(data_dir)
    class_counts = {}
    image_paths = []
    labels = []
    
    for class_idx, class_name in enumerate(classes):
        class_path = os.path.join(data_dir, class_name)
        if os.path.isdir(class_path):
            images = os.listdir(class_path)
            class_counts[class_name] = len(images)
            
            for img in images:
                image_paths.append(os.path.join(class_path, img))
                labels.append(class_idx)
    
    return image_paths, labels, class_counts

train_paths, train_labels, train_counts = get_data_info(train_dir)
test_paths, test_labels, test_counts = get_data_info(test_dir)

print("Training set distribution:")
for class_name, count in train_counts.items():
    print(f"{class_name}: {count} images")

print("\nTest set distribution:")
for class_name, count in test_counts.items():
    print(f"{class_name}: {count} images")

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 1. Data Loading and Exploration

## 2. Exploratory Data Analysis

In [None]:
def plot_class_distribution(train_counts, test_counts):
    plt.figure(figsize=(12, 6))
    
    x = np.arange(len(train_counts))
    width = 0.35
    
    plt.bar(x - width/2, train_counts.values(), width, label='Train')
    plt.bar(x + width/2, test_counts.values(), width, label='Test')
    
    plt.xlabel('Classes')
    plt.ylabel('Number of Images')
    plt.title('Class Distribution in Train and Test Sets')
    plt.xticks(x, train_counts.keys(), rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

plot_class_distribution(train_counts, test_counts)

# Display sample images from each class
def plot_sample_images(data_dir, classes, num_samples=5):
    fig, axes = plt.subplots(len(classes), num_samples, figsize=(15, 3*len(classes)))
    
    for i, class_name in enumerate(classes):
        class_path = os.path.join(data_dir, class_name)
        if os.path.isdir(class_path):
            images = os.listdir(class_path)[:num_samples]
            
            for j, img_name in enumerate(images):
                img_path = os.path.join(class_path, img_name)
                img = Image.open(img_path)
                axes[i, j].imshow(img)
                axes[i, j].axis('off')
                if j == 0:
                    axes[i, j].set_title(f'{class_name}\n{img.size}', loc='left')
                else:
                    axes[i, j].set_title(f'{img.size}')
    
    plt.tight_layout()
    plt.show()

plot_sample_images(train_dir, train_counts.keys())

## 3. Data Preprocessing

In [None]:
class SkinCancerDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Define data transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets
train_dataset = SkinCancerDataset(train_paths, train_labels, train_transform)
test_dataset = SkinCancerDataset(test_paths, test_labels, test_transform)

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

## 4. Model Architecture

In [None]:
class SkinCancerModel(nn.Module):
    def __init__(self, num_classes):
        super(SkinCancerModel, self).__init__()
        self.model = models.resnet50(pretrained=True)
        
        # Freeze early layers
        for param in list(self.model.parameters())[:-20]:
            param.requires_grad = False
        
        # Modify the final layer for our number of classes
        num_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.model(x)

# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SkinCancerModel(num_classes=len(train_counts)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## 5. Training Process

In [None]:
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=20):
    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        
        # Validation phase
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                test_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        test_loss = test_loss / len(test_loader)
        test_acc = 100 * correct / total
        test_losses.append(test_loss)
        test_accuracies.append(test_acc)
        
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.2f}%')
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%\n')
    
    return train_losses, test_losses, train_accuracies, test_accuracies

# Train the model
train_losses, test_losses, train_accuracies, test_accuracies = train_model(
    model, train_loader, test_loader, criterion, optimizer
)

## 6. Model Evaluation

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Test Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training and Test Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# Generate confusion matrix and classification report
def evaluate_model(model, test_loader, class_names):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Create confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # Print classification report
    print('\nClassification Report:')
    print(classification_report(all_labels, all_preds, target_names=class_names))

evaluate_model(model, test_loader, list(train_counts.keys()))

# Save the model
torch.save(model.state_dict(), 'skin_cancer_model.pth')