In [12]:
import pandas as pd
import numpy as np
import os
import random
from tqdm import tqdm
from pathlib import Path
import cv2 as cv
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential, Model, Input
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
import warnings
warnings.filterwarnings("ignore")
random.seed(45)

In [13]:
# **IMPROVEMENT 1: Add comprehensive logging and visualization**
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [14]:
# **IMPROVEMENT 2: Create class to manage experiments**
class ExperimentManager:
    def __init__(self, experiment_name):
        self.experiment_name = experiment_name
        self.results_dir = f"results/{experiment_name}"
        os.makedirs(self.results_dir, exist_ok=True)
        self.results = {}
    
    def log_experiment(self, model_name, metrics, config):
        self.results[model_name] = {
            'metrics': metrics,
            'config': config
        }
        
        # Save results to JSON
        import json
        with open(f"{self.results_dir}/results.json", 'w') as f:
            json.dump(self.results, f, indent=4)

In [15]:
# **IMPROVEMENT 3: Advanced data analysis and visualization**
def analyze_dataset_comprehensive(meta_train_df):
    """Perform comprehensive dataset analysis for HD/DI requirements"""
    
    # Class imbalance analysis
    class_distribution = meta_train_df['label'].value_counts()
    imbalance_ratio = class_distribution.max() / class_distribution.min()
    
    # Create visualizations
    fig, axes = plt.subplots(2, 2, figsize=(20, 16))
    
    # Plot 1: Class distribution
    class_distribution.plot(kind='bar', ax=axes[0,0])
    axes[0,0].set_title(f'Class Distribution (Imbalance Ratio: {imbalance_ratio:.2f}:1)')
    axes[0,0].set_xlabel('Disease Class')
    axes[0,0].set_ylabel('Number of Images')
    
    # Plot 2: Variety distribution
    variety_distribution = meta_train_df['variety'].value_counts()
    variety_distribution.plot(kind='bar', ax=axes[0,1])
    axes[0,1].set_title('Variety Distribution')
    axes[0,1].set_xlabel('Paddy Variety')
    axes[0,1].set_ylabel('Number of Images')
    
    # Plot 3: Age distribution
    axes[1,0].hist(meta_train_df['age'], bins=30, edgecolor='black')
    axes[1,0].set_title('Age Distribution')
    axes[1,0].set_xlabel('Age (days)')
    axes[1,0].set_ylabel('Frequency')
    
    # Plot 4: Class-Variety heatmap
    cross_table = pd.crosstab(meta_train_df['label'], meta_train_df['variety'])
    sns.heatmap(cross_table, cmap='YlOrRd', annot=True, fmt='d', ax=axes[1,1])
    axes[1,1].set_title('Class-Variety Distribution Heatmap')
    
    plt.tight_layout()
    plt.savefig('dataset_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    return {
        'class_distribution': class_distribution,
        'imbalance_ratio': imbalance_ratio,
        'variety_distribution': variety_distribution,
        'age_stats': {
            'mean': meta_train_df['age'].mean(),
            'std': meta_train_df['age'].std(),
            'min': meta_train_df['age'].min(),
            'max': meta_train_df['age'].max()
        }
    }

In [16]:
# **IMPROVEMENT 4: Advanced data augmentation with class balancing**
def create_balanced_data_generator(train_df, batch_size=32, img_size=(256, 256)):
    """Create data generator with class balancing and variety-aware sampling"""
    
    # Calculate class weights
    class_weights = compute_class_weight(
        'balanced',
        classes=np.unique(train_df['label']),
        y=train_df['label']
    )
    class_weight_dict = dict(enumerate(class_weights))
    
    # Advanced augmentation pipeline
    class_specific_augmentation = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1.0/255.0,
        rotation_range=20,
        width_shift_range=0.15,
        height_shift_range=0.15,
        horizontal_flip=True,
        vertical_flip=True,
        zoom_range=0.15,
        fill_mode='nearest',
        # More aggressive augmentation for minority classes
        shear_range=0.15,
        brightness_range=[0.8, 1.2],
        channel_shift_range=20.0
    )
    
    return class_specific_augmentation, class_weight_dict

In [17]:
# **IMPROVEMENT 5: Enhanced ViT with attention visualization**
class EnhancedViT(tf.keras.Model):
    def __init__(self, num_classes, **kwargs):
        super(EnhancedViT, self).__init__()
        self.num_classes = num_classes
        self.attention_weights = []
        
        # Build the model architecture
        self.data_augmentation = keras.Sequential([
            layers.Normalization(),
            layers.Resizing(72, 72),
            layers.RandomFlip("horizontal"),
            layers.RandomRotation(factor=0.05),
            layers.RandomZoom(height_factor=0.3, width_factor=0.3),
            layers.RandomContrast(factor=0.2),  # Added
            layers.RandomBrightness(factor=0.2)  # Added
        ], name="data_augmentation")
        
        self.patches = Patches(patch_size=6)
        self.patch_encoder = PatchEncoder(num_patches=144, projection_dim=64)
        
        # Transformer layers with attention weight collection
        self.transformer_blocks = []
        for i in range(8):
            transformer_block = TransformerBlock(
                embed_dim=64,
                num_heads=4,
                ff_dim=128,
                rate=0.1,
                name=f"transformer_block_{i}"
            )
            self.transformer_blocks.append(transformer_block)
        
        self.layernorm = layers.LayerNormalization(epsilon=1e-6)
        self.flatten = layers.Flatten()
        self.dropout = layers.Dropout(0.5)
        
        # Enhanced MLP head
        self.mlp_head = keras.Sequential([
            layers.Dense(2048, activation='gelu'),
            layers.Dropout(0.3),
            layers.Dense(1024, activation='gelu'),
            layers.Dropout(0.3),
            layers.Dense(512, activation='gelu'),
            layers.Dropout(0.2),
            layers.Dense(num_classes)
        ])
    
    def call(self, inputs, training=None, return_attention=False):
        # Data augmentation
        x = self.data_augmentation(inputs)
        
        # Create patches and encode
        patches = self.patches(x)
        encoded_patches = self.patch_encoder(patches)
        
        # Pass through transformer blocks
        self.attention_weights = []
        for transformer in self.transformer_blocks:
            encoded_patches, attention_weights = transformer(encoded_patches, return_attention=True)
            if return_attention:
                self.attention_weights.append(attention_weights)
        
        # Final processing
        representation = self.layernorm(encoded_patches)
        representation = self.flatten(representation)
        representation = self.dropout(representation, training=training)
        
        # Classification
        logits = self.mlp_head(representation)
        
        if return_attention:
            return logits, self.attention_weights
        return logits

In [18]:
# **IMPROVEMENT 6: Cross-validation with stratified sampling**
def stratified_cross_validation(X, y, n_splits=5):
    """Perform stratified k-fold cross-validation"""
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    cv_results = []
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        logger.info(f"Training fold {fold + 1}/{n_splits}")
        
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        
        # Create and train model for this fold
        model = create_enhanced_vit_classifier()
        
        # Train with early stopping and learning rate scheduling
        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                monitor='val_accuracy',
                patience=15,
                restore_best_weights=True
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_accuracy',
                factor=0.5,
                patience=7,
                min_lr=1e-7
            ),
            tf.keras.callbacks.ModelCheckpoint(
                f'models/fold_{fold}_best_model.keras',
                monitor='val_accuracy',
                save_best_only=True
            )
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=100,
            batch_size=32,
            callbacks=callbacks,
            verbose=1
        )
        
        # Evaluate fold
        y_pred = model.predict(X_val)
        y_pred_classes = np.argmax(y_pred, axis=1)
        
        # Store results
        fold_results = {
            'fold': fold + 1,
            'best_val_accuracy': max(history.history['val_accuracy']),
            'final_val_accuracy': history.history['val_accuracy'][-1],
            'confusion_matrix': confusion_matrix(y_val, y_pred_classes),
            'classification_report': classification_report(y_val, y_pred_classes, output_dict=True)
        }
        
        cv_results.append(fold_results)
    
    return cv_results

In [19]:
# **IMPROVEMENT 7: Model ensemble implementation**
class ModelEnsemble:
    def __init__(self, models):
        self.models = models
    
    def predict(self, X):
        """Ensemble prediction using voting"""
        predictions = []
        
        for model in self.models:
            pred = model.predict(X)
            predictions.append(pred)
        
        # Weighted average (can be adjusted based on individual model performance)
        ensemble_pred = np.mean(predictions, axis=0)
        return ensemble_pred
    
    def predict_proba(self, X):
        """Get probability predictions from ensemble"""
        predictions = []
        
        for model in self.models:
            pred = model.predict(X)
            predictions.append(tf.nn.softmax(pred).numpy())
        
        # Average probabilities
        ensemble_proba = np.mean(predictions, axis=0)
        return ensemble_proba

In [20]:
# **IMPROVEMENT 8: Comprehensive evaluation and visualization**
def comprehensive_model_evaluation(model, X_test, y_test, class_names):
    """Comprehensive evaluation with advanced metrics and visualizations"""
    
    # Get predictions
    y_pred_probs = model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)
    
    # Create evaluation directory
    eval_dir = "model_evaluation"
    os.makedirs(eval_dir, exist_ok=True)
    
    # 1. Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig(f'{eval_dir}/confusion_matrix.png', dpi=300)
    plt.close()
    
    # 2. Per-class metrics
    report = classification_report(y_test, y_pred, target_names=class_names, output_dict=True)
    
    # Visualize per-class metrics
    metrics_df = pd.DataFrame(report).transpose()
    metrics_df = metrics_df.drop(['accuracy', 'macro avg', 'weighted avg'])
    
    fig, ax = plt.subplots(figsize=(12, 8))
    metrics_df[['precision', 'recall', 'f1-score']].plot(kind='bar', ax=ax)
    plt.title('Per-Class Performance Metrics')
    plt.xlabel('Disease Class')
    plt.ylabel('Score')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.savefig(f'{eval_dir}/per_class_metrics.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 3. ROC curves for each class
    from sklearn.metrics import roc_curve, auc
    from sklearn.preprocessing import label_binarize
    
    y_test_bin = label_binarize(y_test, classes=range(len(class_names)))
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    
    plt.figure(figsize=(10, 8))
    for i in range(len(class_names)):
        fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_pred_probs[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
        plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC = {roc_auc[i]:.3f})')
    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curves for Each Class')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.savefig(f'{eval_dir}/roc_curves.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # 4. Error analysis
    error_indices = np.where(y_pred != y_test)[0]
    error_analysis = {}
    
    for idx in error_indices:
        true_class = class_names[y_test[idx]]
        pred_class = class_names[y_pred[idx]]
        confidence = y_pred_probs[idx, y_pred[idx]]
        
        key = f"{true_class} -> {pred_class}"
        if key not in error_analysis:
            error_analysis[key] = []
        
        error_analysis[key].append({
            'index': idx,
            'confidence': confidence
        })
    
    # Visualize error patterns
    error_counts = {k: len(v) for k, v in error_analysis.items()}
    error_df = pd.DataFrame(list(error_counts.items()), columns=['Error Type', 'Count'])
    error_df = error_df.sort_values('Count', ascending=False).head(10)
    
    plt.figure(figsize=(12, 8))
    sns.barplot(data=error_df, x='Count', y='Error Type')
    plt.title('Top 10 Most Common Misclassifications')
    plt.xlabel('Number of Errors')
    plt.tight_layout()
    plt.savefig(f'{eval_dir}/error_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    return {
        'confusion_matrix': cm,
        'classification_report': report,
        'roc_auc': roc_auc,
        'error_analysis': error_analysis
    }

In [21]:
# **IMPROVEMENT 9: Attention visualization for interpretability**
def visualize_attention_maps(model, image, class_names, save_path='attention_maps.png'):
    """Visualize attention maps for interpretability"""
    
    # Get predictions with attention weights
    predictions, attention_weights = model(tf.expand_dims(image, 0), return_attention=True)
    predicted_class = class_names[tf.argmax(predictions[0])]
    
    # Create visualization
    fig, axes = plt.subplots(3, 3, figsize=(15, 15))
    fig.suptitle(f'Attention Maps - Predicted: {predicted_class}', fontsize=16)
    
    # Show original image
    axes[0, 1].imshow(image)
    axes[0, 1].set_title('Original Image')
    axes[0, 1].axis('off')
    
    # Show attention maps from different layers
    for i, attn_weight in enumerate(attention_weights[:8]):
        row = (i + 1) // 3
        col = (i + 1) % 3
        
        # Average attention across heads
        attn_map = tf.reduce_mean(attn_weight[0], axis=0)
        
        # Reshape to 2D grid
        grid_size = int(np.sqrt(attn_map.shape[0]))
        attn_map_2d = tf.reshape(attn_map, (grid_size, grid_size))
        
        # Resize to image size
        attn_map_resized = tf.image.resize(
            tf.expand_dims(attn_map_2d, -1),
            (image.shape[0], image.shape[1])
        )
        
        # Display
        im = axes[row, col].imshow(attn_map_resized[:, :, 0], cmap='hot', alpha=0.7)
        axes[row, col].imshow(image, alpha=0.3)
        axes[row, col].set_title(f'Layer {i+1}')
        axes[row, col].axis('off')
    
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()

In [23]:
# **IMPROVEMENT 10: Main training pipeline with all enhancements**
def train_enhanced_model(meta_train_path, train_images_path, test_images_path):
    """Main training pipeline with all HD/DI improvements"""
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42
    )
    
    # Initialize experiment manager
    experiment = ExperimentManager("enhanced_vit_task1")
    
    # Load metadata
    meta_df = pd.read_csv(meta_train_path)
    
    # Comprehensive dataset analysis
    logger.info("Performing comprehensive dataset analysis...")
    dataset_analysis = analyze_dataset_comprehensive(meta_df)
    
    # Create balanced data generators
    logger.info("Creating balanced data generators...")
    data_gen, class_weights = create_balanced_data_generator(meta_df)
    
    # Train with cross-validation
    logger.info("Starting stratified cross-validation...")
    cv_results = stratified_cross_validation(X_train, y_train)
    
    # Create ensemble model
    logger.info("Creating ensemble model...")
    ensemble_models = []
    for fold in range(5):
        model = tf.keras.models.load_model(f'models/fold_{fold}_best_model.keras')
        ensemble_models.append(model)
    
    ensemble = ModelEnsemble(ensemble_models)
    
    # Comprehensive evaluation
    logger.info("Performing comprehensive evaluation...")
    evaluation_results = comprehensive_model_evaluation(
        ensemble,
        X_test,
        y_test,
        class_names=list(meta_df['label'].unique())
    )
    
    # Generate attention visualizations
    logger.info("Generating attention visualizations...")
    sample_indices = np.random.choice(len(X_test), 5, replace=False)
    for i, idx in enumerate(sample_indices):
        visualize_attention_maps(
            ensemble_models[0],  # Use first model for attention
            X_test[idx],
            list(meta_df['label'].unique()),
            f'attention_visualization_{i}.png'
        )
    
    # Save final results
    experiment.log_experiment(
        "enhanced_ensemble_vit",
        evaluation_results,
        {
            'architecture': 'Enhanced ViT with ensemble',
            'cross_validation_folds': 5,
            'augmentation': 'Advanced class-balanced augmentation',
            'class_weights': class_weights
        }
    )
    
    # Generate final predictions for submission
    logger.info("Generating final predictions...")
    test_predictions = ensemble.predict_proba(test_images)
    predicted_labels = [class_names[np.argmax(pred)] for pred in test_predictions]
    
    # Create submission file
    submission_df = pd.DataFrame({
        'image_id': [f.split('/')[-1] for f in test_files],
        'label': predicted_labels,
        'confidence': [np.max(pred) for pred in test_predictions]
    })
    
    submission_df.to_csv('enhanced_submission.csv', index=False)
    
    return experiment, ensemble, evaluation_results

# Usage
if __name__ == "__main__":
    experiment, model, results = train_enhanced_model(
        meta_train_path='meta_train.csv',
        train_images_path='train_images/',
        test_images_path='test_images/'
    )

NameError: name 'X' is not defined