In [None]:
# ==========================================
# CELL 1: Setup & Dependencies Installation
# ==========================================

print("🚀 KHỞI ĐỘNG DOG EMOTION RECOGNITION - FINAL FIXED VERSION WITH HEAD CROPPING")
print("=" * 80)

# Install dependencies với comprehensive error handling
import subprocess
import sys
import os

def install_package(package):
    """Install package với error handling"""
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-q"])
        return True
    except subprocess.CalledProcessError as e:
        print(f"⚠️ Failed to install {package}: {e}")
        return False

# Essential packages
packages = [
    "roboflow",
    "ultralytics",
    "torch",
    "torchvision",
    "opencv-python",
    "pillow",
    "pandas",
    "numpy",
    "pyyaml",
    "scikit-learn",
    "matplotlib",
    "seaborn",
    "tqdm",
    "gdown"
]

print("📦 Installing required packages...")
failed_packages = []
for package in packages:
    print(f"Installing {package}...", end=" ")
    if install_package(package):
        print("✅")
    else:
        print("❌")
        failed_packages.append(package)

if failed_packages:
    print(f"\n⚠️ Failed to install: {failed_packages}")
    print("   The notebook may still work with partial functionality")
else:
    print("\n✅ All dependencies installed successfully!")

print("\n✅ Setup phase completed!")


In [None]:
# ==========================================
# CELL 2: Download Roboflow Dataset
# ==========================================

print("📥 DOWNLOADING ROBOFLOW DATASET")
print("=" * 40)

# Download dataset với error handling
try:
    from roboflow import Roboflow

    print("🔗 Connecting to Roboflow...")
    rf = Roboflow(api_key="blm6FIqi33eLS0ewVlKV")
    project = rf.workspace("2642025").project("19-06")
    version = project.version(7)

    print("📥 Downloading dataset...")
    dataset = version.download("yolov12")

    print("✅ Roboflow dataset downloaded successfully!")
    print(f"📁 Dataset location: {dataset.location}")

    # Set dataset paths
    DATASET_ROOT = dataset.location
    DATASET_PATH = f"{DATASET_ROOT}/test"

except Exception as e:
    print(f"⚠️ Failed to download Roboflow dataset: {e}")
    print("   Using fallback paths - you may need to manually download the dataset")

    # Fallback paths
    DATASET_ROOT = "/content/19/06-7"
    DATASET_PATH = "/content/19/06-7/test"

print(f"\n📂 Dataset paths:")
print(f"   Root: {DATASET_ROOT}")
print(f"   Test: {DATASET_PATH}")


In [None]:
# ==========================================
# CELL 3: Clone Repository & Download Models
# ==========================================

import subprocess
import sys
import os
import gdown

print("📥 CLONING REPOSITORY AND DOWNLOADING MODELS")
print("=" * 60)

# Clone repository với error handling
repo_path = '/content/dog-emotion-recognition-hybrid'
try:
    if not os.path.exists(repo_path):
        print("📥 Cloning repository...")
        result = subprocess.run([
            'git', 'clone',
            'https://github.com/hoangh-e/dog-emotion-recognition-hybrid.git'
        ], capture_output=True, text=True, cwd='/content')

        if result.returncode == 0:
            print("✅ Repository cloned successfully")
        else:
            print(f"⚠️ Git clone failed: {result.stderr}")
    else:
        print("✅ Repository already exists")
except Exception as e:
    print(f"❌ Error cloning repository: {e}")

# Download models với comprehensive error handling
print("\n📥 Downloading models...")

model_downloads = {
    # YOLO models
    'yolov12m_dog_head_1cls_100ep_best_v1.pt': '1gK51jAz1gzYad7-UcDMmuH7bq849DOjz',
    'yolov12m_dog_tail_3cls_80ep_best_v2.pt': '1_543yUfdA6DDaOJatgZ0jNGNZgNOGt6M',

    # Emotion models
    'resnet50_50e_best.pth': '1s5KprrhHWkbhjRWCb3OK48I-OriDLR_S',
    'resnet50_30e_best.pth': '1zwXbvUYHH62CcwAgkDX-9PdeSfRn2ngb',
    'resnet101_30e_best.pth': '1AU3zjUYvfPjK5nxsXihQQ175AT0Ex7tH',
    'pure34_30e_best.pth': '11Oy8lqKF7MeMWV89SR-kN6sNLwNi-jjQ',
    'pure50_30e_best.pth': '19YOIURvPQ89AGHxiafqaILr-tDuN8FIa',
    'pure50_50e_best.pth': '1GTUXZxivkn7yALZRYLHKbv_dKlBPbnL5',
    'cr_v_model_folder.zip':'1SBUiWFmz-5PkCcX_wovGba2WR8fGo93R'
}

successful_downloads = []
failed_downloads = []

for filename, file_id in model_downloads.items():
    output_path = f'/content/{filename}'

    # Skip if file already exists and is not empty
    if os.path.exists(output_path) and os.path.getsize(output_path) > 1024:  # > 1KB
        file_size = os.path.getsize(output_path) / (1024*1024)  # MB
        print(f"✅ {filename} already exists ({file_size:.1f} MB)")
        successful_downloads.append(filename)
        continue

    try:
        print(f"📥 Downloading {filename}...", end=" ")
        gdown.download(f'https://drive.google.com/uc?id={file_id}', output_path, quiet=True)

        # Verify download
        if os.path.exists(output_path) and os.path.getsize(output_path) > 1024:
            file_size = os.path.getsize(output_path) / (1024*1024)  # MB
            print(f"✅ ({file_size:.1f} MB)")
            successful_downloads.append(filename)
        else:
            print("❌ Failed or too small")
            failed_downloads.append(filename)

    except Exception as e:
        print(f"❌ Error: {e}")
        failed_downloads.append(filename)

print(f"\n📊 DOWNLOAD SUMMARY:")
print(f"✅ Successful: {len(successful_downloads)}/{len(model_downloads)}")
if failed_downloads:
    print(f"❌ Failed: {failed_downloads}")
    print("   The notebook will continue with available models")
else:
    print("🎉 All models downloaded successfully!")

!unzip cr_v_model_folder.zip


In [None]:
# ==========================================
# CELL 4: Import Libraries & Configuration
# ==========================================

import os
import cv2
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from PIL import Image
import torchvision.transforms as transforms
from ultralytics import YOLO
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
from collections import defaultdict
import json
from datetime import datetime
import sys

warnings.filterwarnings('ignore')

# Add repository to path
sys.path.append('/content/dog-emotion-recognition-hybrid')

print("📦 All packages imported successfully!")
print(f"🔥 PyTorch version: {torch.__version__}")
print(f"🚀 CUDA available: {torch.cuda.is_available()}")

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🖥️ Using device: {device}")

# ROBOFLOW DATASET CONFIGURATION
IMAGES_PATH = Path(DATASET_PATH) / "images"
LABELS_PATH = Path(DATASET_PATH) / "labels"

# FIXED CLASS MAPPING
ROBOFLOW_CLASSES = ['angry', 'happy', 'relaxed', 'sad']
CLASS_MAPPING = {
    0: 'angry',
    1: 'happy',
    2: 'relaxed',
    3: 'sad'
}

# YOLO Models
YOLO_TAIL_MODEL = "/content/yolov12m_dog_tail_3cls_80ep_best_v2.pt"
YOLO_HEAD_MODEL = "/content/yolov12m_dog_head_1cls_100ep_best_v1.pt"

# Output configuration
OUTPUT_DIR = "/content/outputs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("✅ Configuration loaded successfully!")
print(f"📁 Images path: {IMAGES_PATH}")
print(f"🏷️ Labels path: {LABELS_PATH}")
print(f"💾 Output directory: {OUTPUT_DIR}")


In [None]:
# ==========================================
# CELL 5: FIXED Import Emotion Classification Functions
# ==========================================

print("📥 IMPORTING EMOTION CLASSIFICATION FUNCTIONS...")
print("=" * 60)

# FIXED: Import correct functions with proper modules to avoid conflicts
import_status = {}

try:
    # FIXED: Import Pure34 functions từ dog_emotion_classification.pure34
    from dog_emotion_classification.pure34 import (
        load_pure34_model, predict_emotion_pure34
    )
    import_status['pure34'] = True
    print("✅ Pure34 functions imported successfully")
except Exception as e:
    import_status['pure34'] = False
    print(f"❌ Error importing Pure34 functions: {e}")

try:
    # FIXED: Import Pure50 functions từ dog_emotion_classification.pure50
    from dog_emotion_classification.pure50 import (
        load_pure50_model, predict_emotion_pure50
    )
    import_status['pure50'] = True
    print("✅ Pure50 functions imported successfully")
except Exception as e:
    import_status['pure50'] = False
    print(f"❌ Error importing Pure50 functions: {e}")

try:
    # FIXED: Import ResNet functions từ dog_emotion_classification.resnet (với architecture parameter)
    from dog_emotion_classification.resnet import (
        load_resnet_model, predict_emotion_resnet
    )
    import_status['resnet'] = True
    print("✅ ResNet functions imported successfully (with architecture parameter)")
except Exception as e:
    import_status['resnet'] = False
    print(f"❌ Error importing ResNet functions: {e}")

# Load YOLO models với error handling
print("\n🔄 Loading YOLO models...")
yolo_models = {}

try:
    if os.path.exists(YOLO_HEAD_MODEL):
        yolo_head = YOLO(YOLO_HEAD_MODEL)
        yolo_models['head'] = yolo_head
        print(f"✅ YOLO Head model loaded from {YOLO_HEAD_MODEL}")
    else:
        yolo_head = None
        print(f"⚠️ YOLO Head model not found: {YOLO_HEAD_MODEL}")
except Exception as e:
    yolo_head = None
    print(f"❌ Error loading YOLO Head: {e}")

try:
    if os.path.exists(YOLO_TAIL_MODEL):
        yolo_tail = YOLO(YOLO_TAIL_MODEL)
        yolo_models['tail'] = yolo_tail
        print(f"✅ YOLO Tail model loaded from {YOLO_TAIL_MODEL}")
    else:
        yolo_tail = None
        print(f"⚠️ YOLO Tail model not found: {YOLO_TAIL_MODEL}")
except Exception as e:
    yolo_tail = None
    print(f"❌ Error loading YOLO Tail: {e}")

print(f"\n📊 IMPORT SUMMARY:")
print(f"✅ Functions imported: {sum(import_status.values())}/3")
print(f"✅ YOLO models loaded: {len(yolo_models)}/2")

# CRITICAL CHECK: Both YOLO models required
if not (yolo_head and yolo_tail):
    print("\n❌ CRITICAL ERROR: Both YOLO Head and Tail models are required!")
    print("   This notebook requires BOTH models for filtering")
    print("   Please ensure both models are downloaded and loaded successfully")
    raise RuntimeError("Both YOLO models are required for this notebook")

if not any(import_status.values()):
    print("⚠️ WARNING: No emotion classification functions available!")
    print("   The notebook will create a basic dataset without emotion predictions")

print("\n✅ Import phase completed!")


In [None]:
# ==========================================
# CELL 6: NEW - HEAD CROPPING & DETECTION FUNCTIONS
# ==========================================

def safe_path_convert(path_obj):
    """CRITICAL FIX: Convert Path objects to strings safely"""
    if isinstance(path_obj, Path):
        return str(path_obj)
    return path_obj

def crop_head_from_bbox(image_path, bbox, padding=0.1):
    """
    NEW FUNCTION: Crop head region from image using bounding box
    
    Args:
        image_path: Path to the image
        bbox: Bounding box coordinates [x1, y1, x2, y2]
        padding: Additional padding around the bbox (default 10%)
    
    Returns:
        PIL Image of cropped head region
    """
    try:
        # Load image
        image = Image.open(image_path)
        img_width, img_height = image.size
        
        # Extract bbox coordinates
        x1, y1, x2, y2 = bbox
        
        # Calculate bbox dimensions
        bbox_width = x2 - x1
        bbox_height = y2 - y1
        
        # Add padding
        pad_x = bbox_width * padding
        pad_y = bbox_height * padding
        
        # Calculate new coordinates with padding
        new_x1 = max(0, x1 - pad_x)
        new_y1 = max(0, y1 - pad_y)
        new_x2 = min(img_width, x2 + pad_x)
        new_y2 = min(img_height, y2 + pad_y)
        
        # Crop the image
        cropped_image = image.crop((new_x1, new_y1, new_x2, new_y2))
        
        return cropped_image
        
    except Exception as e:
        print(f"❌ Error cropping head from {image_path}: {e}")
        return None

def detect_head_and_tail_WITH_CROPPING(image_path):
    """
    NEW FUNCTION: Detect head and tail - BOTH REQUIRED + HEAD CROPPING
    Returns the highest confidence head bbox for cropping
    """
    result = {
        'head_detected': False,
        'head_bbox': None,
        'head_confidence': 0.0,
        'tail_detected': False,
        'tail_position': 'unknown',
        'tail_confidence': 0.0,
        'both_detected': False,
        'cropped_head': None  # NEW: Store cropped head image
    }

    try:
        # FIXED: Convert Path to string for YOLO
        image_path_str = safe_path_convert(image_path)

        # Head detection with confidence tracking
        if yolo_head:
            head_results = yolo_head(image_path_str)
            best_head_conf = 0.0
            best_head_bbox = None
            
            for r in head_results:
                if r.boxes is not None and len(r.boxes) > 0:
                    # Find the highest confidence head detection
                    for box in r.boxes:
                        conf = float(box.conf[0])
                        if conf > best_head_conf:
                            best_head_conf = conf
                            best_head_bbox = box.xyxy[0].cpu().numpy().tolist()
                    
                    if best_head_bbox is not None:
                        result['head_detected'] = True
                        result['head_bbox'] = best_head_bbox
                        result['head_confidence'] = best_head_conf
                        
                        # NEW: Crop head region using highest confidence bbox
                        cropped_head = crop_head_from_bbox(image_path, best_head_bbox)
                        result['cropped_head'] = cropped_head
                        
                        print(f"✅ Head detected with confidence {best_head_conf:.3f}, cropped successfully")
                    break

        # Tail detection (unchanged)
        if yolo_tail:
            tail_results = yolo_tail(image_path_str)
            for r in tail_results:
                if r.boxes is not None and len(r.boxes) > 0:
                    best_box = None
                    best_conf = 0
                    best_class = None

                    for box in r.boxes:
                        conf = float(box.conf[0])
                        if conf > best_conf:
                            best_conf = conf
                            best_box = box
                            best_class = int(box.cls[0])

                    if best_box is not None:
                        result['tail_detected'] = True
                        result['tail_confidence'] = best_conf

                        # Map class to position
                        class_names = ['DownTail', 'MidTail', 'UpTail']
                        if best_class < len(class_names):
                            tail_name = class_names[best_class]
                            if 'Down' in tail_name:
                                result['tail_position'] = 'down'
                            elif 'Mid' in tail_name:
                                result['tail_position'] = 'mid'
                            elif 'Up' in tail_name:
                                result['tail_position'] = 'up'
                    break

        # CRITICAL: Set both_detected flag
        result['both_detected'] = result['head_detected'] and result['tail_detected']

        return result

    except Exception as e:
        print(f"❌ Error in head/tail detection for {image_path}: {e}")
        return result

def predict_emotions_with_cropped_head(cropped_head_image, original_image_path, head_bbox=None):
    """
    NEW FUNCTION: Predict emotions using CROPPED HEAD IMAGE instead of original
    """
    predictions = {}

    # Only process if we have working models and cropped image
    if not emotion_models or cropped_head_image is None:
        return {}

    # Save cropped image temporarily for prediction
    temp_cropped_path = "/tmp/temp_cropped_head.jpg"
    try:
        cropped_head_image.save(temp_cropped_path)
    except Exception as e:
        print(f"❌ Error saving cropped image: {e}")
        return {}

    for model_name, model in emotion_models.items():
        try:
            transform = emotion_transforms[model_name]
            config = ENABLED_MODELS[model_name]

            if config['type'] == 'pure34' and import_status.get('pure34', False):
                # Use cropped image instead of original
                emotion_scores = predict_emotion_pure34(
                    temp_cropped_path, model, transform, None, device  # No head_bbox needed for cropped image
                )

            elif config['type'] == 'pure50' and import_status.get('pure50', False):
                # Use cropped image instead of original
                emotion_scores = predict_emotion_pure50(
                    temp_cropped_path, model, transform, None, device  # No head_bbox needed for cropped image
                )

            elif config['type'] == 'resnet' and import_status.get('resnet', False):
                # Use cropped image instead of original
                emotion_scores = predict_emotion_resnet(
                    temp_cropped_path, model, transform, None, device  # No head_bbox needed for cropped image
                )

            else:
                continue

            predictions[model_name] = emotion_scores

        except Exception as e:
            print(f"❌ Error predicting with {model_name} for cropped head: {e}")
            # Create default scores for failed predictions
            default_scores = {
                'sad': 0.0, 'angry': 0.0, 'happy': 0.0, 'relaxed': 0.0,
                'predicted': False
            }
            predictions[model_name] = default_scores

    # Clean up temporary file
    try:
        if os.path.exists(temp_cropped_path):
            os.remove(temp_cropped_path)
    except:
        pass

    return predictions

print("✅ NEW HEAD CROPPING FUNCTIONS DEFINED!")
print("✅ Head detection will find highest confidence bbox")
print("✅ Emotion prediction will use CROPPED HEAD IMAGE")
print("✅ All original logic preserved - only processing method enhanced")


In [None]:
# ==========================================
# CELL 7: FIXED Load Emotion Models với Correct Parameters (UNCHANGED)
# ==========================================

print("🔄 LOADING EMOTION MODELS WITH CORRECT PARAMETERS")
print("=" * 70)

# Multi-Model Configuration with FIXED parameters based on actual function signatures
EMOTION_MODELS = {
    'resnet50_50e_fold1':{
        'path': '/content/fold_1_model.pth',
        'type': 'resnet',
        'architecture': 'resnet50',
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/fold_1_model.pth')
    },
    'resnet50_50e_fold2':{
        'path': '/content/fold_2_model.pth',
        'type': 'resnet',
        'architecture': 'resnet50',
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/fold_2_model.pth')
    },
    'resnet50_50e_fold3':{
        'path': '/content/fold_3_model.pth',
        'type': 'resnet',
        'architecture': 'resnet50',
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/fold_3_model.pth')
    },
    'resnet50_50e_fold4':{
        'path': '/content/fold_4_model.pth',
        'type': 'resnet',
        'architecture': 'resnet50',
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/fold_4_model.pth')
    },
    'resnet50_50e_fold5':{
        'path': '/content/fold_5_model.pth',
        'type': 'resnet',
        'architecture': 'resnet50',
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/fold_5_model.pth')
    },
    'pure34_30e': {
        'path': '/content/pure34_30e_best.pth',
        'type': 'pure34',
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/pure34_30e_best.pth')
    },
    'pure50_30e': {
        'path': '/content/pure50_30e_best.pth',
        'type': 'pure50',
        'input_size': 512,  # Pure50 needs input_size parameter
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/pure50_30e_best.pth')
    },
    'pure50_50e': {
        'path': '/content/pure50_50e_best.pth',
        'type': 'pure50',
        'input_size': 512,  # Pure50 needs input_size parameter
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/pure50_50e_best.pth')
    },
    'resnet50_50e': {
        'path': '/content/resnet50_50e_best.pth',
        'type': 'resnet',
        'architecture': 'resnet50',  # ResNet needs architecture parameter
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/resnet50_50e_best.pth')
    },
    'resnet50_30e': {
        'path': '/content/resnet50_30e_best.pth',
        'type': 'resnet',
        'architecture': 'resnet50',  # ResNet needs architecture parameter
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/resnet50_30e_best.pth')
    },
    'resnet101_30e': {
        'path': '/content/resnet101_30e_best.pth',
        'type': 'resnet',
        'architecture': 'resnet101',  # ResNet needs architecture parameter
        'input_size': 224,
        'classes': ['sad', 'angry', 'happy', 'relaxed'],
        'enabled': os.path.exists('/content/resnet101_30e_best.pth')
    }
}

# Check file existence and sizes
print("📋 MODEL FILE STATUS:")
for model_name, config in EMOTION_MODELS.items():
    if os.path.exists(config['path']):
        file_size = os.path.getsize(config['path']) / (1024*1024)  # MB
        print(f"✅ {model_name}: {file_size:.1f} MB")
    else:
        print(f"❌ {model_name}: FILE NOT FOUND")
        config['enabled'] = False

# Filter enabled models
ENABLED_MODELS = {name: config for name, config in EMOTION_MODELS.items() if config['enabled']}

print(f"\n📊 ENABLED MODELS: {len(ENABLED_MODELS)}/{len(EMOTION_MODELS)}")

# Storage for loaded models
emotion_models = {}
emotion_transforms = {}
loading_errors = []

if not ENABLED_MODELS:
    print("⚠️ WARNING: No emotion models available!")
    print("   The notebook will create a basic dataset without emotion predictions")
else:
    print(f"\n🔄 Loading {len(ENABLED_MODELS)} emotion models...")

    for model_name, config in ENABLED_MODELS.items():
        try:
            print(f"🔄 Loading {model_name}...", end=" ")

            # Check if required import is available
            required_import = import_status.get(config['type'], False)
            if not required_import:
                print(f"❌ Import for {config['type']} not available")
                loading_errors.append(f"{model_name}: Import not available")
                continue

            # FIXED: Use correct parameters for each model type
            if config['type'] == 'pure34':
                # Pure34: load_pure34_model(model_path, num_classes=4, device='cuda')
                model, transform = load_pure34_model(
                    model_path=config['path'],
                    num_classes=4,
                    device=device
                )

            elif config['type'] == 'pure50':
                # Pure50: load_pure50_model(model_path, num_classes=4, input_size=512, device='cuda')
                model, transform = load_pure50_model(
                    model_path=config['path'],
                    num_classes=4,
                    input_size=config['input_size'],
                    device=device
                )

            elif config['type'] == 'resnet':
                # ResNet: load_resnet_model(model_path, architecture='resnet50', num_classes=4, input_size=224, device='cuda')
                model, transform = load_resnet_model(
                    model_path=config['path'],
                    architecture=config['architecture'],
                    num_classes=4,
                    input_size=config['input_size'],
                    device=device
                )

            else:
                raise ValueError(f"Unknown model type: {config['type']}")

            emotion_models[model_name] = model
            emotion_transforms[model_name] = transform
            print("✅")

        except Exception as e:
            print(f"❌ Error: {e}")
            loading_errors.append(f"{model_name}: {str(e)}")
            if model_name in ENABLED_MODELS:
                del ENABLED_MODELS[model_name]
            continue

print(f"\n📊 FINAL MODEL LOADING SUMMARY:")
print(f"✅ Successfully loaded: {len(emotion_models)} models")
print(f"❌ Failed to load: {len(loading_errors)} models")

if loading_errors:
    print("\n❌ Loading errors:")
    for error in loading_errors:
        print(f"   - {error}")

if emotion_models:
    print(f"\n🎉 Working emotion models:")
    for model_name in emotion_models.keys():
        print(f"   ✅ {model_name}")
else:
    print("\n⚠️ No emotion models loaded successfully!")
    print("   The notebook will create a basic dataset with placeholder emotion values")

print("\n✅ Model loading phase completed!")


In [None]:
# ==========================================
# CELL 8: Label Reading Functions (UNCHANGED)
# ==========================================

def read_roboflow_annotation_direct(image_path):
    """Read Roboflow annotation directly from .txt file"""
    try:
        image_name = image_path.stem
        label_file = LABELS_PATH / f"{image_name}.txt"

        if not label_file.exists():
            return None

        with open(label_file, 'r') as f:
            lines = f.readlines()

        if not lines:
            return None

        first_line = lines[0].strip()
        if not first_line:
            return None

        parts = first_line.split()
        if len(parts) < 1:
            return None

        class_id = int(float(parts[0]))

        if class_id in CLASS_MAPPING:
            return CLASS_MAPPING[class_id]
        else:
            print(f"⚠️ Unknown class_id {class_id} in {label_file}")
            return None

    except Exception as e:
        print(f"❌ Error reading annotation for {image_path}: {e}")
        return None

def get_manual_label_from_filename(image_path):
    """Extract emotion label from filename"""
    filename = image_path.name.lower()
    for emotion in ROBOFLOW_CLASSES:
        if emotion.lower() in filename:
            return emotion
    return None

def get_hybrid_label_direct(image_path):
    """Get emotion label using hybrid approach"""
    # Try Roboflow annotation first
    roboflow_label = read_roboflow_annotation_direct(image_path)
    if roboflow_label:
        return roboflow_label, 'roboflow_direct'

    # Fallback to filename
    filename_label = get_manual_label_from_filename(image_path)
    if filename_label:
        return filename_label, 'filename'

    return 'unknown', 'none'

print("✅ Label reading functions defined!")


In [None]:
# ==========================================
# CELL 9: MAIN PROCESSING LOOP WITH HEAD CROPPING
# ==========================================

print("🚀 STARTING PROCESSING WITH HEAD CROPPING FUNCTIONALITY")
print("=" * 80)

# Pre-processing checks
print("🔍 PRE-PROCESSING CHECKS:")

# Check dataset existence
if not IMAGES_PATH.exists():
    print(f"❌ Images path does not exist: {IMAGES_PATH}")
    print("   Please check your dataset configuration")
    raise FileNotFoundError(f"Images directory not found: {IMAGES_PATH}")

# Get all images from dataset
image_files = list(IMAGES_PATH.glob("*.jpg")) + list(IMAGES_PATH.glob("*.png")) + list(IMAGES_PATH.glob("*.jpeg"))
print(f"🖼️ Found {len(image_files)} images to process")

if not image_files:
    print("❌ No images found! Check dataset path.")
    raise FileNotFoundError("No images found in the dataset")

# Check model availability
print(f"🤖 Available models:")
print(f"   YOLO Head: {'✅' if yolo_head else '❌'}")
print(f"   YOLO Tail: {'✅' if yolo_tail else '❌'}")
print(f"   Emotion models: {len(emotion_models)} loaded")

# Initialize processing
results = []
processed_count = 0
skipped_count = 0
both_detected_count = 0
head_only_count = 0
tail_only_count = 0
neither_detected_count = 0
emotion_predictions = 0
cropping_success_count = 0

print(f"\n🚀 Starting processing with HEAD CROPPING functionality...")
print(f"📊 Target: {len(image_files)} images")
print(f"🔧 PosixPath fixes: ENABLED")
print(f"🛡️ Error handling: COMPREHENSIVE")
print(f"🎯 Filtering: BOTH HEAD & TAIL DETECTION REQUIRED")
print(f"✂️ HEAD CROPPING: ENABLED - Using highest confidence bbox")

# Processing loop with progress bar
for i, image_path in enumerate(tqdm(image_files, desc="Processing images with head cropping")):
    try:
        # Get ground truth label
        ground_truth, label_source = get_hybrid_label_direct(image_path)

        # NEW: Use head cropping detection function
        detection_result = detect_head_and_tail_WITH_CROPPING(image_path)

        # Update detection statistics
        if detection_result['head_detected'] and detection_result['tail_detected']:
            both_detected_count += 1
        elif detection_result['head_detected'] and not detection_result['tail_detected']:
            head_only_count += 1
        elif not detection_result['head_detected'] and detection_result['tail_detected']:
            tail_only_count += 1
        else:
            neither_detected_count += 1

        # CRITICAL: Only process images with BOTH detections
        if not detection_result['both_detected']:
            skipped_count += 1
            continue  # Skip this image

        # Check if head cropping was successful
        if detection_result['cropped_head'] is not None:
            cropping_success_count += 1

        # Build result row with basic information (only for images with both detections)
        row = {
            'filename': image_path.name,
            'ground_truth': ground_truth,
            'label_source': label_source,
            'head_detected': detection_result['head_detected'],
            'head_confidence': detection_result['head_confidence'],  # NEW: Track head confidence
            'tail_detected': detection_result['tail_detected'],
            'both_detected': detection_result['both_detected'],
            'tail_position': detection_result['tail_position'],
            'tail_confidence': detection_result['tail_confidence'],
            'head_cropped': detection_result['cropped_head'] is not None  # NEW: Track cropping success
        }

        # NEW: Use cropped head for emotion prediction (if models available)
        if emotion_models and detection_result['cropped_head'] is not None:
            emotion_predictions_result = predict_emotions_with_cropped_head(
                detection_result['cropped_head'],
                image_path,
                detection_result['head_bbox']
            )

            if emotion_predictions_result:
                emotion_predictions += 1

                # Add emotion predictions from all models
                for model_name, predictions in emotion_predictions_result.items():
                    if predictions.get('predicted', True):  # Default to True if not specified
                        # Add individual emotion scores
                        for emotion in ['sad', 'angry', 'happy', 'relaxed']:
                            col_name = f"{model_name}_{emotion}"
                            row[col_name] = predictions.get(emotion, 0.0)
                    else:
                        # Model failed - add zeros
                        for emotion in ['sad', 'angry', 'happy', 'relaxed']:
                            col_name = f"{model_name}_{emotion}"
                            row[col_name] = 0.0
        else:
            # No emotion models available or cropping failed - add placeholder values
            for emotion in ['sad', 'angry', 'happy', 'relaxed']:
                row[emotion] = 0.25  # Equal probability for all emotions

        # Add tail features based on detected position
        if detection_result['tail_position'] == 'down':
            row['down'] = detection_result['tail_confidence']
            row['up'] = (1 - detection_result['tail_confidence']) / 2
            row['mid'] = (1 - detection_result['tail_confidence']) / 2
        elif detection_result['tail_position'] == 'up':
            row['up'] = detection_result['tail_confidence']
            row['down'] = (1 - detection_result['tail_confidence']) / 2
            row['mid'] = (1 - detection_result['tail_confidence']) / 2
        elif detection_result['tail_position'] == 'mid':
            row['mid'] = detection_result['tail_confidence']
            row['down'] = (1 - detection_result['tail_confidence']) / 2
            row['up'] = (1 - detection_result['tail_confidence']) / 2
        else:
            # Fallback
            row['down'] = 0.33
            row['up'] = 0.33
            row['mid'] = 0.34

        results.append(row)
        processed_count += 1

        # Progress update every 50 images
        if (i + 1) % 50 == 0:
            print(f"✅ Processed {i + 1}/{len(image_files)} images | Valid: {processed_count} | Skipped: {skipped_count} | Cropped: {cropping_success_count}")

    except Exception as e:
        print(f"❌ Error processing {image_path}: {e}")
        skipped_count += 1
        continue

# Create DataFrame and save results
if results:
    df = pd.DataFrame(results)
    print(f"\n🎉 PROCESSING WITH HEAD CROPPING COMPLETE!")
    print(f"📊 DETAILED STATISTICS:")
    print(f"   📁 Total images found: {len(image_files)}")
    print(f"   👁️🐕 Both head & tail detected: {both_detected_count}")
    print(f"   👁️ Head only detected: {head_only_count}")
    print(f"   🐕 Tail only detected: {tail_only_count}")
    print(f"   ❌ Neither detected: {neither_detected_count}")
    print(f"   ✅ Successfully processed (both detections): {processed_count}")
    print(f"   ✂️ Head cropping successful: {cropping_success_count}")
    print(f"   ⏭️ Skipped (missing detections): {skipped_count}")
    print(f"   😊 Emotion predictions: {emotion_predictions}")
    print(f"   📈 Both detection rate: {(both_detected_count/len(image_files)*100):.1f}%")
    print(f"   ✂️ Head cropping success rate: {(cropping_success_count/both_detected_count*100):.1f}%")
    print(f"   🎯 Processing success rate: {(processed_count/len(image_files)*100):.1f}%")

    print(f"\n📋 FINAL DATASET: {len(df)} rows and {len(df.columns)} columns")
    print(f"   (Only images with BOTH head & tail detections + HEAD CROPPING)")

    # Show sample of the dataset
    print(f"\n📋 SAMPLE DATA (first 3 rows):")
    display_cols = ['filename', 'ground_truth', 'head_detected', 'head_confidence', 'tail_detected', 'head_cropped']
    available_cols = [col for col in display_cols if col in df.columns]
    print(df[available_cols].head(3))

    # Show column summary
    print(f"\n📊 DATASET COLUMNS:")
    print(f"   Basic info: {len([col for col in df.columns if col in ['filename', 'ground_truth', 'label_source']])} columns")
    print(f"   Detection info: {len([col for col in df.columns if 'detected' in col or 'position' in col or 'confidence' in col or 'cropped' in col])} columns")
    print(f"   Emotion features: {len([col for col in df.columns if any(emotion in col for emotion in ['sad', 'angry', 'happy', 'relaxed'])])} columns")
    print(f"   Tail features: {len([col for col in df.columns if col in ['down', 'up', 'mid']])} columns")

    # Save results
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    csv_filename = f"dataset_HEAD_CROPPING_{timestamp}.csv"
    csv_path = os.path.join(OUTPUT_DIR, csv_filename)
    df.to_csv(csv_path, index=False)
    print(f"\n💾 Dataset saved: {csv_path}")

    print(f"\n🎉 HEAD CROPPING VERSION COMPLETE!")
    print("✅ Only images with BOTH head & tail detections included")
    print("✂️ Head regions cropped using highest confidence bounding box")
    print("🧠 Emotion predictions made on CROPPED HEAD IMAGES")
    print("✅ All original logic preserved - enhanced with head cropping")
    print("✅ High-quality dataset ready for ML training!")

else:
    print("❌ No images with both head & tail detections found!")
    print("   Please check your YOLO models and dataset quality")
    df = None

print("\n" + "=" * 80)
print("🐕 DOG EMOTION RECOGNITION - HEAD CROPPING VERSION COMPLETE")
print("✂️ HEAD CROPPING FUNCTIONALITY SUCCESSFULLY IMPLEMENTED")
print("=" * 80)


In [None]:
# ==========================================
# CELL 10: COMPREHENSIVE MODEL COMPARISON & VISUALIZATION (ENHANCED FOR HEAD CROPPING)
# ==========================================

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

print("📊 COMPREHENSIVE MODEL COMPARISON & VISUALIZATION - HEAD CROPPING VERSION")
print("=" * 80)

if df is not None and len(df) > 0:

    # Filter only samples with valid ground truth
    valid_df = df[df['ground_truth'].isin(['angry', 'happy', 'relaxed', 'sad'])].copy()
    print(f"📋 Valid samples for evaluation: {len(valid_df)}/{len(df)}")

    if len(valid_df) == 0:
        print("⚠️ No valid samples found for evaluation!")
    else:
        # Get available model columns
        model_emotion_cols = []
        available_models = []

        # Duyệt qua từng cột trong DataFrame
        for col in valid_df.columns:
            if any(emotion in col for emotion in ['sad', 'angry', 'happy', 'relaxed']):
                # Tìm model_name thực sự bằng cách kiểm tra key nào trong EMOTION_MODELS nằm ở đầu của col
                for model_key in EMOTION_MODELS.keys():
                    if col.startswith(model_key + "_"):  # e.g., 'resnet50_50e_fold1_sad'
                        model_info = EMOTION_MODELS[model_key]
                        model_type = model_info['type']
                        arch = model_info.get('architecture', '')

                        if (
                            model_type in ['pure34', 'pure50'] or
                            (model_type == 'resnet' and arch in ['resnet50', 'resnet101'])
                        ):
                            if model_key not in available_models:
                                available_models.append(model_key)
                        break  # Stop checking once matched

        print(f"🤖 Available models for comparison: {available_models}")

        if len(available_models) == 0:
            print("⚠️ No model prediction columns found!")
            available_models = ['baseline']  # Create baseline comparison

        # Create model predictions and calculate accuracy
        model_accuracies = {}
        model_predictions = {}
        model_confidences = {}

        for model_name in available_models:
            if model_name == 'baseline':
                # Baseline: random prediction
                predictions = np.random.choice(['angry', 'happy', 'relaxed', 'sad'], len(valid_df))
                confidences = np.random.uniform(0.25, 0.4, len(valid_df))
            else:
                # Get model emotion columns
                emotion_cols = [f"{model_name}_{emotion}" for emotion in ['sad', 'angry', 'happy', 'relaxed']]
                available_emotion_cols = [col for col in emotion_cols if col in valid_df.columns]

                if len(available_emotion_cols) >= 4:
                    # Get predictions from model
                    emotion_probs = valid_df[available_emotion_cols].values
                    predictions = []
                    confidences = []

                    for probs in emotion_probs:
                        max_idx = np.argmax(probs)
                        max_conf = probs[max_idx]
                        pred_emotion = ['sad', 'angry', 'happy', 'relaxed'][max_idx]
                        predictions.append(pred_emotion)
                        confidences.append(max_conf)

                    predictions = np.array(predictions)
                    confidences = np.array(confidences)
                else:
                    print(f"⚠️ Incomplete emotion columns for {model_name}")
                    continue

            # Calculate accuracy
            accuracy = accuracy_score(valid_df['ground_truth'], predictions)
            model_accuracies[model_name] = accuracy
            model_predictions[model_name] = predictions
            model_confidences[model_name] = confidences

            print(f"📈 {model_name}: Accuracy = {accuracy:.3f} ({accuracy*100:.1f}%)")

        # ENHANCED: Create performance summary with head cropping statistics
        print(f"\n📊 CREATING ENHANCED VISUALIZATIONS WITH HEAD CROPPING STATS...")

        # 1. MODEL ACCURACY COMPARISON BAR CHART
        plt.figure(figsize=(16, 12))

        plt.subplot(2, 3, 1)
        models = list(model_accuracies.keys())
        accuracies = list(model_accuracies.values())
        colors = plt.cm.Set3(np.linspace(0, 1, len(models)))

        bars = plt.bar(models, accuracies, color=colors, alpha=0.8, edgecolor='black', linewidth=1)
        plt.title('🎯 Model Accuracy Comparison\n(Using Cropped Head Images)', fontsize=14, fontweight='bold')
        plt.ylabel('Accuracy', fontsize=12)
        plt.xlabel('Models', fontsize=12)
        plt.xticks(rotation=45)
        plt.ylim(0, 1)

        # Add accuracy labels on bars
        for bar, acc in zip(bars, accuracies):
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                    f'{acc:.3f}\\n({acc*100:.1f}%)',
                    ha='center', va='bottom', fontweight='bold')

        # Add 90% confidence line
        plt.axhline(y=0.9, color='red', linestyle='--', linewidth=2, label='90% Confidence Target')
        plt.legend()
        plt.grid(True, alpha=0.3)

        # 2. CONFIDENCE DISTRIBUTION
        plt.subplot(2, 3, 2)
        for model_name, confidences in model_confidences.items():
            plt.hist(confidences, bins=20, alpha=0.6, label=f'{model_name}', density=True)

        plt.axvline(x=0.9, color='red', linestyle='--', linewidth=2, label='90% Confidence')
        plt.title('📊 Prediction Confidence Distribution\n(Cropped Head Predictions)', fontsize=14, fontweight='bold')
        plt.xlabel('Confidence Score', fontsize=12)
        plt.ylabel('Density', fontsize=12)
        plt.legend()
        plt.grid(True, alpha=0.3)

        # 3. CLASS DISTRIBUTION
        plt.subplot(2, 3, 3)
        class_counts = valid_df['ground_truth'].value_counts()
        colors_pie = plt.cm.Set2(np.linspace(0, 1, len(class_counts)))

        wedges, texts, autotexts = plt.pie(class_counts.values, labels=class_counts.index,
                                          autopct='%1.1f%%', colors=colors_pie, startangle=90)
        plt.title('🏷️ Ground Truth Distribution\n(Processed Dataset)', fontsize=14, fontweight='bold')

        # 4. HIGH CONFIDENCE PREDICTIONS (>90%)
        plt.subplot(2, 3, 4)
        high_conf_counts = {}
        for model_name, confidences in model_confidences.items():
            high_conf_count = np.sum(confidences > 0.9)
            high_conf_percentage = (high_conf_count / len(confidences)) * 100
            high_conf_counts[model_name] = high_conf_percentage

        models_hc = list(high_conf_counts.keys())
        percentages_hc = list(high_conf_counts.values())
        colors_hc = plt.cm.Set1(np.linspace(0, 1, len(models_hc)))

        bars_hc = plt.bar(models_hc, percentages_hc, color=colors_hc, alpha=0.8,
                         edgecolor='black', linewidth=1)
        plt.title('🎯 High Confidence Predictions (>90%)\n(Using Cropped Heads)', fontsize=14, fontweight='bold')
        plt.ylabel('Percentage of Predictions', fontsize=12)
        plt.xlabel('Models', fontsize=12)
        plt.xticks(rotation=45)

        # Add percentage labels
        for bar, pct in zip(bars_hc, percentages_hc):
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                    f'{pct:.1f}%', ha='center', va='bottom', fontweight='bold')

        plt.grid(True, alpha=0.3)

        # 5. ENHANCED DETECTION & CROPPING SUCCESS RATES
        plt.subplot(2, 3, 5)
        detection_stats = {
            'Head Detection': (valid_df['head_detected'].sum() / len(valid_df)) * 100,
            'Tail Detection': (valid_df['tail_detected'].sum() / len(valid_df)) * 100,
            'Both Detected': ((valid_df['head_detected'] & valid_df['tail_detected']).sum() / len(valid_df)) * 100,
            'Head Cropping': (valid_df['head_cropped'].sum() / len(valid_df)) * 100 if 'head_cropped' in valid_df.columns else 0
        }

        detection_names = list(detection_stats.keys())
        detection_values = list(detection_stats.values())
        colors_det = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']

        bars_det = plt.bar(detection_names, detection_values, color=colors_det, alpha=0.8,
                          edgecolor='black', linewidth=1)
        plt.title('🔍 Detection & Cropping Success Rates', fontsize=14, fontweight='bold')
        plt.ylabel('Success Rate (%)', fontsize=12)
        plt.xticks(rotation=45)

        # Add percentage labels
        for bar, val in zip(bars_det, detection_values):
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height + 1,
                    f'{val:.1f}%', ha='center', va='bottom', fontweight='bold')

        plt.grid(True, alpha=0.3)

        # 6. ENHANCED MODEL PERFORMANCE SUMMARY TABLE
        plt.subplot(2, 3, 6)
        plt.axis('off')

        # Create performance summary with head cropping enhancement
        summary_data = []
        for model_name in model_accuracies.keys():
            acc = model_accuracies[model_name]
            avg_conf = np.mean(model_confidences[model_name])
            high_conf_pct = (np.sum(model_confidences[model_name] > 0.9) / len(model_confidences[model_name])) * 100

            # Determine status with head cropping consideration
            if acc >= 0.9:
                status = "🎉 EXCELLENT (Cropped)"
            elif acc >= 0.8:
                status = "✅ GOOD (Cropped)"
            elif acc >= 0.7:
                status = "⚠️ FAIR (Cropped)"
            else:
                status = "❌ POOR (Cropped)"

            summary_data.append([
                model_name,
                f"{acc:.3f}",
                f"{avg_conf:.3f}",
                f"{high_conf_pct:.1f}%",
                status
            ])

        # Create table with proper bounds checking
        if len(summary_data) > 0:
            table_headers = ['Model', 'Accuracy', 'Avg Conf', '>90% Conf', 'Status']

            table = plt.table(cellText=summary_data, colLabels=table_headers,
                             cellLoc='center', loc='center',
                             colWidths=[0.2, 0.15, 0.15, 0.15, 0.25])
            table.auto_set_font_size(False)
            table.set_fontsize(9)
            table.scale(1, 2)

            # Style the table with proper bounds checking
            num_rows = len(summary_data) + 1  # +1 for header
            num_cols = len(table_headers)

            for i in range(num_rows):
                for j in range(num_cols):
                    cell = table[(i, j)]
                    if i == 0:  # Header
                        cell.set_facecolor('#4ECDC4')
                        cell.set_text_props(weight='bold')
                    else:
                        if j == 4 and i-1 < len(summary_data):  # Status column
                            status_text = summary_data[i-1][j]
                            if 'EXCELLENT' in status_text:
                                cell.set_facecolor('#90EE90')
                            elif 'GOOD' in status_text:
                                cell.set_facecolor('#FFFFE0')
                            elif 'FAIR' in status_text:
                                cell.set_facecolor('#FFE4B5')
                            else:
                                cell.set_facecolor('#FFB6C1')

        plt.title('📋 Model Performance Summary\n(Head Cropping Enhanced)', fontsize=14, fontweight='bold', pad=20)

        plt.tight_layout()
        plt.show()

        # DETAILED STATISTICS WITH HEAD CROPPING INFO
        print(f"\n📊 DETAILED MODEL STATISTICS - HEAD CROPPING VERSION:")
        print("=" * 80)

        for model_name in model_accuracies.keys():
            if model_name == 'baseline':
                continue

            print(f"\n🤖 {model_name.upper()} (Using Cropped Head Images):")
            print(f"   📈 Accuracy: {model_accuracies[model_name]:.4f} ({model_accuracies[model_name]*100:.2f}%)")
            print(f"   🎯 Average Confidence: {np.mean(model_confidences[model_name]):.4f}")
            print(f"   🔥 High Confidence (>90%): {(np.sum(model_confidences[model_name] > 0.9) / len(model_confidences[model_name]))*100:.1f}%")
            print(f"   📊 Min Confidence: {np.min(model_confidences[model_name]):.4f}")
            print(f"   📊 Max Confidence: {np.max(model_confidences[model_name]):.4f}")

        # HEAD CROPPING SPECIFIC STATISTICS
        if 'head_cropped' in valid_df.columns:
            cropping_success = valid_df['head_cropped'].sum()
            cropping_rate = (cropping_success / len(valid_df)) * 100
            print(f"\n✂️ HEAD CROPPING STATISTICS:")
            print(f"   📊 Successfully cropped heads: {cropping_success}/{len(valid_df)}")
            print(f"   📈 Head cropping success rate: {cropping_rate:.1f}%")
            
            if 'head_confidence' in valid_df.columns:
                avg_head_conf = valid_df['head_confidence'].mean()
                print(f"   🎯 Average head detection confidence: {avg_head_conf:.4f}")

else:
    print("❌ No data available for visualization!")
    print("   Please run the processing cells first to generate dataset")

print("\n✅ ENHANCED Visualization and analysis with HEAD CROPPING completed!")
print("✂️ All emotion predictions were made using CROPPED HEAD IMAGES")
print("🎯 This should provide more focused and accurate emotion recognition!")
