In [None]:
# Install required packages
!pip install pandas numpy scikit-learn matplotlib seaborn
!pip install xgboost lightgbm
!pip install torch torchvision torchaudio
!pip install ultralytics
!pip install pillow opencv-python


In [None]:
# Import all necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Import our custom ML package
from dog_emotion_ml import (
    EmotionMLClassifier,
    EnsembleMetaLearner,
    RoboflowDataProcessor,
    DataNormalizer,
    print_ensemble_summary,
    get_ensemble_config
)

print("✅ All libraries imported successfully!")
print("📦 Dog Emotion ML Package loaded")


In [None]:
# Initialize the ML classifier
classifier = EmotionMLClassifier(random_state=42)

print("🎯 EmotionMLClassifier initialized")
print(f"📋 Emotion features: {classifier.emotion_features}")
print(f"📋 Tail features: {classifier.tail_features}")


In [None]:
# Generate sample data for demonstration
print("🔄 Generating sample training data...")

# Generate sample dataset
np.random.seed(42)
n_samples = 1000

data = []
emotions = ['sad', 'angry', 'happy', 'relaxed']

for i in range(n_samples):
    filename = f"sample_{i:04d}.jpg"
    
    # Generate emotion probabilities with one dominant emotion
    dominant_emotion = np.random.choice(4)
    emotion_probs = np.random.dirichlet([0.5, 0.5, 0.5, 0.5])
    emotion_probs[dominant_emotion] += 0.5
    emotion_probs = emotion_probs / emotion_probs.sum()
    
    # Generate tail probabilities
    tail_probs = np.random.dirichlet([1, 1, 1])
    
    # True label
    true_emotion = emotions[dominant_emotion]
    
    row = [filename] + emotion_probs.tolist() + tail_probs.tolist() + [true_emotion]
    data.append(row)

# Create DataFrame
columns = ['filename', 'sad', 'angry', 'happy', 'relaxed', 'down', 'up', 'mid', 'label']
train_df = pd.DataFrame(data, columns=columns)

# Generate test and meta-learning datasets
test_df = train_df.sample(n=200, random_state=42)
test_for_train_df = train_df.sample(n=300, random_state=123)

print(f"✅ Generated datasets:")
print(f"   Training: {train_df.shape}")
print(f"   Test: {test_df.shape}")
print(f"   Meta-learning: {test_for_train_df.shape}")


In [None]:
# Load datasets into classifier
print("📂 Loading datasets into classifier...")

# Use DataFrames directly (simulate loading from files)
classifier.train_data = train_df
classifier.test_data = test_df  
classifier.test_for_train_data = test_for_train_df

print(f"✅ Training data: {classifier.train_data.shape}")
print(f"✅ Test data: {classifier.test_data.shape}")
print(f"✅ Meta-learning data: {classifier.test_for_train_data.shape}")

# Visualize class distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 3, 1)
train_df['label'].value_counts().plot(kind='bar', alpha=0.7, color='skyblue')
plt.title('Training Data Distribution')
plt.xticks(rotation=45)

plt.subplot(1, 3, 2)
test_df['label'].value_counts().plot(kind='bar', alpha=0.7, color='lightcoral')
plt.title('Test Data Distribution')
plt.xticks(rotation=45)

plt.subplot(1, 3, 3)
test_for_train_df['label'].value_counts().plot(kind='bar', alpha=0.7, color='lightgreen')
plt.title('Meta-learning Data Distribution')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

print("📊 Data visualization complete")


In [None]:
# Display available ensemble learning techniques
print("🎯 Available Ensemble Learning Techniques:")
print_ensemble_summary()


In [None]:
# Prepare data for training
print("🔄 Preparing data for training...")

# Prepare training data with advanced normalization
classifier.prepare_training_data(use_advanced_normalization=True)
classifier.prepare_test_data(use_advanced_normalization=True)
classifier.prepare_test_for_train_data(use_advanced_normalization=True)

print("✅ Data preparation complete!")
print(f"📊 Training features shape: {classifier.X_train.shape}")
print(f"📊 Test features shape: {classifier.X_test.shape}")
print(f"📊 Meta-learning features shape: {classifier.X_test_for_train.shape}")


In [None]:
# Train all models with 7 ensemble techniques
print("🚀 Starting comprehensive model training...")
print("⏱️ This may take several minutes...")

# Train all models
classifier.train_all_models()

print(f"\n🎉 Training complete! Total models: {len(classifier.trained_models)}")
classifier.list_trained_models()


In [None]:
# Evaluate all models
print("📈 Evaluating all models...")

model_results = {}
for model_name in classifier.trained_models.keys():
    try:
        accuracy = classifier.evaluate_model(model_name)
        model_results[model_name] = accuracy
    except Exception as e:
        print(f"❌ Error evaluating {model_name}: {e}")
        model_results[model_name] = 0.0

# Sort results by performance
sorted_results = dict(sorted(model_results.items(), key=lambda x: x[1], reverse=True))

print(f"\n🏆 TOP 10 PERFORMING MODELS:")
for i, (model, acc) in enumerate(list(sorted_results.items())[:10]):
    emoji = "🥇" if i == 0 else "🥈" if i == 1 else "🥉" if i == 2 else "🏅"
    print(f"{emoji} {i+1:2d}. {model:<25} : {acc:.4f}")

print("\n📊 Model evaluation complete!")


In [None]:
# Visualize model performance
plt.figure(figsize=(15, 8))

# Get top 15 models for visualization
top_15 = dict(list(sorted_results.items())[:15])
models = list(top_15.keys())
accuracies = list(top_15.values())

bars = plt.bar(range(len(models)), accuracies, alpha=0.7, color='skyblue', edgecolor='navy')

# Add value labels on bars
for i, acc in enumerate(accuracies):
    plt.text(i, acc + 0.005, f'{acc:.3f}', ha='center', va='bottom', fontweight='bold')

plt.title('🏆 Top 15 Model Performance Comparison', fontsize=16, fontweight='bold')
plt.xlabel('Models', fontsize=12)
plt.ylabel('Test Accuracy', fontsize=12)
plt.xticks(range(len(models)), models, rotation=45, ha='right')
plt.ylim(0, max(accuracies) * 1.1)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

# Performance statistics
avg_acc = np.mean(list(sorted_results.values()))
best_acc = max(sorted_results.values())
worst_acc = min(sorted_results.values())

print(f"📊 Performance Statistics:")
print(f"   Best: {best_acc:.4f}")
print(f"   Average: {avg_acc:.4f}")
print(f"   Worst: {worst_acc:.4f}")
print(f"   Range: {best_acc - worst_acc:.4f}")


In [None]:
# Generate meta-training data
print("🔄 Generating meta-training data for algorithm selection...")

meta_training_data = classifier.generate_meta_training_data()
print(f"✅ Meta-training data generated: {meta_training_data.shape}")

# Save to temporary CSV for meta-learner
meta_data_path = "temp_meta_training_data.csv"
meta_training_data.to_csv(meta_data_path, index=False)
print(f"💾 Meta-training data saved: {meta_data_path}")

# Show sample of meta-training data
print(f"\n📋 Sample of meta-training data:")
print(meta_training_data.head())
print(f"\n📊 Columns: {list(meta_training_data.columns)}")


In [None]:
# Initialize and train meta-learner
print("🧠 Initializing meta-learner for algorithm selection...")

meta_learner = EnsembleMetaLearner(random_state=42)

# Load meta-training data
meta_learner.load_meta_training_data(meta_data_path)

# Analyze algorithm performance
print("\n📊 Analyzing algorithm performance patterns...")
performance_analysis = meta_learner.analyze_algorithm_performance()

# Train meta-learner with Decision Tree
print("\n🚀 Training meta-learner...")
meta_learner.train_meta_learner(algorithm='DecisionTree')

print("✅ Meta-learner training complete!")


In [None]:
# Demonstrate algorithm selection
print("🎯 Demonstrating intelligent algorithm selection...")

# Test with different feature combinations
test_scenarios = [
    ([0.8, 0.1, 0.05, 0.05], [0.2, 0.7, 0.1], "Sad dog with tail up"),
    ([0.1, 0.8, 0.05, 0.05], [0.6, 0.2, 0.2], "Angry dog with tail down"),
    ([0.05, 0.05, 0.8, 0.1], [0.1, 0.8, 0.1], "Happy dog with tail up"),
    ([0.1, 0.1, 0.1, 0.7], [0.3, 0.3, 0.4], "Relaxed dog with tail mid"),
    ([0.4, 0.4, 0.1, 0.1], [0.5, 0.3, 0.2], "Conflicted emotions, tail down")
]

print(f"\n🧪 Testing {len(test_scenarios)} scenarios:")
print("-" * 60)

for i, (emotion_features, tail_features, description) in enumerate(test_scenarios):
    print(f"\n📋 Scenario {i+1}: {description}")
    print(f"   Emotion probabilities: {emotion_features}")
    print(f"   Tail probabilities: {tail_features}")
    
    try:
        best_algo, confidence = meta_learner.predict_best_algorithm(emotion_features, tail_features)
        print(f"   🎯 Recommended algorithm: {best_algo}")
        
        if confidence is not None:
            max_conf = confidence.max()
            print(f"   📊 Confidence: {max_conf:.3f}")
            
            # Show top 3 algorithm recommendations
            top_3_indices = np.argsort(confidence)[-3:][::-1]
            print(f"   🏆 Top 3 recommendations:")
            for j, idx in enumerate(top_3_indices):
                algo_name = meta_learner.algorithm_encoder.classes_[idx]
                conf_score = confidence[idx]
                print(f"      {j+1}. {algo_name}: {conf_score:.3f}")
                
    except Exception as e:
        print(f"   ❌ Error: {e}")

print("\n✅ Algorithm selection demonstration complete!")


In [None]:
# Comprehensive results analysis
print("📋 COMPREHENSIVE RESULTS ANALYSIS")
print("=" * 60)

# Ensemble technique performance analysis
ensemble_categories = {
    'Bagging': ['Bagging', 'RandomForest', 'ExtraTrees'],
    'Boosting': ['XGBoost', 'AdaBoost', 'GradientBoosting', 'LightGBM'],
    'Stacking': ['Stacking'],
    'Voting': ['Voting'],
    'Advanced': ['NegativeCorrelationEnsemble', 'HeterogeneousEnsemble', 'MultiLevelDeepEnsemble'],
    'Classical': ['LogisticRegression', 'SVM', 'DecisionTree', 'NaiveBayes', 'KNN', 'LDA', 'QDA', 'MLP']
}

print(f"\n🏆 ENSEMBLE TECHNIQUE PERFORMANCE:")
ensemble_performance = {}

for category, patterns in ensemble_categories.items():
    matching_models = []
    for model_name, acc in sorted_results.items():
        if any(pattern in model_name for pattern in patterns):
            matching_models.append((model_name, acc))
    
    if matching_models:
        best_model, best_acc = max(matching_models, key=lambda x: x[1])
        avg_acc = np.mean([acc for _, acc in matching_models])
        count = len(matching_models)
        
        ensemble_performance[category] = {
            'best_acc': best_acc,
            'avg_acc': avg_acc,
            'count': count,
            'best_model': best_model
        }
        
        print(f"  {category:<12}: Best={best_acc:.4f} ({best_model[:20]}...), Avg={avg_acc:.4f}, Count={count}")

print(f"\n📊 OVERALL STATISTICS:")
print(f"  • Total models trained: {len(sorted_results)}")
print(f"  • Best overall accuracy: {best_acc:.4f}")
print(f"  • Average accuracy: {avg_acc:.4f}")
print(f"  • Worst accuracy: {worst_acc:.4f}")
print(f"  • Performance range: {best_acc - worst_acc:.4f}")

# Find best model per category
print(f"\n🥇 BEST MODEL PER CATEGORY:")
for category, stats in ensemble_performance.items():
    print(f"  {category}: {stats['best_model']} ({stats['best_acc']:.4f})")


In [None]:
# Create comprehensive visualization dashboard
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Ensemble technique comparison
if ensemble_performance:
    categories = list(ensemble_performance.keys())
    avg_accs = [ensemble_performance[cat]['avg_acc'] for cat in categories]
    
    axes[0, 0].bar(categories, avg_accs, alpha=0.7, color='lightblue', edgecolor='navy')
    axes[0, 0].set_title('📊 Average Performance by Ensemble Technique')
    axes[0, 0].set_ylabel('Average Accuracy')
    axes[0, 0].tick_params(axis='x', rotation=45)
    
    # Add value labels
    for i, acc in enumerate(avg_accs):
        axes[0, 0].text(i, acc + 0.005, f'{acc:.3f}', ha='center', va='bottom')

# 2. Accuracy distribution histogram
axes[0, 1].hist(list(sorted_results.values()), bins=15, alpha=0.7, color='lightgreen', edgecolor='darkgreen')
axes[0, 1].axvline(avg_acc, color='red', linestyle='--', linewidth=2, label=f'Mean: {avg_acc:.3f}')
axes[0, 1].set_title('📈 Model Accuracy Distribution')
axes[0, 1].set_xlabel('Accuracy')
axes[0, 1].set_ylabel('Number of Models')
axes[0, 1].legend()

# 3. Top 10 models performance
top_10_models = list(sorted_results.keys())[:10]
top_10_accs = list(sorted_results.values())[:10]

axes[1, 0].barh(range(len(top_10_models)), top_10_accs, alpha=0.7, color='orange', edgecolor='red')
axes[1, 0].set_yticks(range(len(top_10_models)))
axes[1, 0].set_yticklabels([model[:20] + '...' if len(model) > 20 else model for model in top_10_models])
axes[1, 0].set_title('🏆 Top 10 Model Performance')
axes[1, 0].set_xlabel('Accuracy')
axes[1, 0].invert_yaxis()

# 4. Model count by category
if ensemble_performance:
    category_counts = [ensemble_performance[cat]['count'] for cat in categories]
    
    axes[1, 1].pie(category_counts, labels=categories, autopct='%1.1f%%', startangle=90)
    axes[1, 1].set_title('🔧 Model Distribution by Category')

plt.tight_layout()
plt.show()

print("📊 Comprehensive visualization dashboard complete!")


In [None]:
# Save results and generate final summary
results_df = pd.DataFrame([
    {
        'Model': model, 
        'Accuracy': acc, 
        'Rank': i+1,
        'Category': next((cat for cat, patterns in ensemble_categories.items() 
                         if any(pattern in model for pattern in patterns)), 'Other')
    }
    for i, (model, acc) in enumerate(sorted_results.items())
])

# Save results to CSV
results_path = "dog_emotion_ml_results.csv"
results_df.to_csv(results_path, index=False)

print(f"💾 Results saved to: {results_path}")

# Final summary
print(f"\n🎉 DEMO COMPLETE!")
print(f"=" * 50)
print(f"✅ Successfully demonstrated complete dog emotion recognition pipeline")
print(f"✅ Implemented 7 ensemble learning techniques:")
print(f"   1. Bagging (Bootstrap Aggregating)")
print(f"   2. Boosting (XGBoost, AdaBoost, GradientBoosting)")
print(f"   3. Stacking (Meta-model combination)")
print(f"   4. Voting (Soft/Hard voting)")
print(f"   5. Negative Correlation Ensemble")
print(f"   6. Heterogeneous Ensemble")
print(f"   7. Multi-level Deep Ensemble")
print(f"✅ Trained {len(sorted_results)} different ML models")
print(f"✅ Implemented meta-learning for intelligent algorithm selection")
print(f"✅ Best performing model: {list(sorted_results.keys())[0]} ({list(sorted_results.values())[0]:.4f})")

# Recommendations
print(f"\n💡 RECOMMENDATIONS:")
best_category = max(ensemble_performance.items(), key=lambda x: x[1]['best_acc'])
print(f"   🏆 Best ensemble technique: {best_category[0]} (Best: {best_category[1]['best_acc']:.4f})")
print(f"   🎯 Recommended model: {best_category[1]['best_model']}")
print(f"   📈 Use meta-learner for dynamic algorithm selection based on input features")
print(f"   🔧 Consider ensemble voting of top 3-5 models for production use")

print(f"\n📊 Dataset: {train_df.shape[0]} training samples, {test_df.shape[0]} test samples")
print(f"🎯 Features: 4 emotion + 3 tail = 7 total features")
print(f"🏷️ Classes: {len(train_df['label'].unique())} emotion categories")

print(f"\n🚀 Ready for production deployment!")
print(f"📦 Package: dog_emotion_ml v2.1.0")
print(f"🔗 Integration: Can be integrated with ResNet + YOLO models")
