# Dog Emotion Classification - Training on Keypoints

Trening modelu klasyfikacji emocji psów na podstawie 20 keypoints twarzy.

**Architektura:**
- Input: 60 wartości (20 keypoints × 3: x, y, visibility)
- Model: MLP (256 → 128 → 64 → 6)
- Output: 6 klas emocji (happy, sad, angry, fearful, relaxed, neutral)

**Wymagane datasety Kaggle:**
1. `lovodkin/dogflw` - keypoints twarzy psów (46 punktów → mapujemy do 20)
2. `dougandrade/dog-emotions-5-classes` - obrazy z etykietami emocji

**Po zakończeniu:**
Pobierz plik `emotion_keypoints.pt` i przekaż go do projektu.

In [None]:
# Instalacja zależności
!pip install -q timm albumentations

In [None]:
import os
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from pathlib import Path
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import cv2

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

## 1. Konfiguracja

In [None]:
# === KONFIGURACJA ===

# Ścieżki do datasetów (dostosuj do Kaggle)
DOGFLW_PATH = '/kaggle/input/dogflw'  # Dataset z keypoints
EMOTIONS_PATH = '/kaggle/input/dog-emotions-5-classes'  # Dataset z emocjami

# Parametry modelu
NUM_KEYPOINTS = 20
INPUT_FEATURES = NUM_KEYPOINTS * 3  # 60
NUM_EMOTIONS = 6
EMOTION_CLASSES = ['happy', 'sad', 'angry', 'fearful', 'relaxed', 'neutral']

# Mapping emocji z datasetu 5-klasowego do naszego 6-klasowego
# Dataset 5-class: happy, sad, angry, fearful, relaxed (brak neutral)
EMOTION_MAPPING = {
    'happy': 0,
    'sad': 1,
    'angry': 2,
    'fear': 3,      # W datasecie może być 'fear' zamiast 'fearful'
    'fearful': 3,
    'relaxed': 4,
    'neutral': 5,
}

# Parametry treningu
BATCH_SIZE = 64
EPOCHS = 100
LEARNING_RATE = 0.001
HIDDEN_DIMS = [256, 128, 64]
DROPOUT = 0.3

print(f'Input features: {INPUT_FEATURES}')
print(f'Emotion classes: {EMOTION_CLASSES}')

## 2. Mapping DogFLW (46) → Project (20) Keypoints

In [None]:
# Nazwy 20 keypoints projektu
KEYPOINT_NAMES = [
    "left_eye",           # 0
    "right_eye",          # 1
    "nose",               # 2
    "left_ear_base",      # 3
    "right_ear_base",     # 4
    "left_ear_tip",       # 5
    "right_ear_tip",      # 6
    "left_mouth_corner",  # 7
    "right_mouth_corner", # 8
    "upper_lip",          # 9
    "lower_lip",          # 10
    "chin",               # 11
    "left_cheek",         # 12
    "right_cheek",        # 13
    "forehead",           # 14
    "left_eyebrow",       # 15
    "right_eyebrow",      # 16
    "muzzle_top",         # 17
    "muzzle_left",        # 18
    "muzzle_right",       # 19
]

# Mapping: DogFLW index → Project index
DOGFLW_TO_PROJECT = {
    0: 0,   # left_eye
    1: 1,   # right_eye
    14: 2,  # nose
    32: 3,  # left_ear_base
    36: 4,  # right_ear_base
    34: 5,  # left_ear_tip
    38: 6,  # right_ear_tip
    20: 7,  # left_mouth_corner
    24: 8,  # right_mouth_corner
    22: 9,  # upper_lip
    26: 10, # lower_lip
    28: 11, # chin
    4: 12,  # left_cheek
    8: 13,  # right_cheek
    40: 14, # forehead
    42: 15, # left_eyebrow
    44: 16, # right_eyebrow
    16: 17, # muzzle_top
    6: 18,  # muzzle_left
    10: 19, # muzzle_right
}

# Odwrotny mapping: Project index → DogFLW index
PROJECT_TO_DOGFLW = {v: k for k, v in DOGFLW_TO_PROJECT.items()}

print(f'Mapping zdefiniowany: {len(PROJECT_TO_DOGFLW)} keypoints')

## 3. Model Keypoints (do ekstrakcji z obrazów)

In [None]:
import timm
from torchvision import transforms

class SimpleBaselineModel(nn.Module):
    """Model do detekcji 46 keypoints DogFLW."""
    
    def __init__(self, num_keypoints=46):
        super().__init__()
        self.bb = timm.create_model(
            'resnet50',
            pretrained=False,
            features_only=True,
            out_indices=[-1],
        )
        self.head = nn.Sequential(
            nn.ConvTranspose2d(2048, 256, 4, 2, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 256, 4, 2, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 256, 4, 2, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
        )
        self.out = nn.Conv2d(256, num_keypoints, 1)
    
    def forward(self, x):
        x = self.bb(x)[-1]
        x = self.head(x)
        return self.out(x)


class KeypointsExtractor:
    """Ekstraktor keypoints z obrazów."""
    
    def __init__(self, weights_path=None):
        self.model = SimpleBaselineModel(num_keypoints=46)
        self.device = device
        
        if weights_path and os.path.exists(weights_path):
            state_dict = torch.load(weights_path, map_location=device)
            self.model.load_state_dict(state_dict)
            print(f'Loaded keypoints weights: {weights_path}')
        else:
            print('WARNING: No keypoints weights loaded!')
        
        self.model = self.model.to(device)
        self.model.eval()
        
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ])
    
    def extract(self, image):
        """Ekstrahuje 20 keypoints z obrazu."""
        if isinstance(image, str):
            image = cv2.imread(image)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        h, w = image.shape[:2]
        tensor = self.transform(image).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            heatmaps = self.model(tensor)[0]  # (46, H, W)
        
        # Dekoduj 46 keypoints
        dogflw_keypoints = self._decode_heatmaps(heatmaps, w, h)
        
        # Mapuj do 20 keypoints
        project_keypoints = self._map_to_project(dogflw_keypoints)
        
        return project_keypoints
    
    def _decode_heatmaps(self, heatmaps, target_w, target_h):
        """Dekoduje heatmapy do współrzędnych."""
        hm_h, hm_w = heatmaps.shape[1], heatmaps.shape[2]
        scale_x = target_w / hm_w
        scale_y = target_h / hm_h
        
        keypoints = []
        for k in range(46):
            hm = heatmaps[k].cpu().numpy()
            max_val = hm.max()
            max_idx = hm.argmax()
            y_hm = max_idx // hm_w
            x_hm = max_idx % hm_w
            
            x = float(x_hm * scale_x)
            y = float(y_hm * scale_y)
            visibility = float(max_val)
            
            keypoints.append((x, y, visibility))
        
        return keypoints
    
    def _map_to_project(self, dogflw_keypoints):
        """Mapuje 46 → 20 keypoints."""
        project_keypoints = []
        for proj_idx in range(20):
            dogflw_idx = PROJECT_TO_DOGFLW[proj_idx]
            project_keypoints.append(dogflw_keypoints[dogflw_idx])
        return project_keypoints


print('KeypointsExtractor defined')

## 4. Model Emocji (MLP)

In [None]:
class KeypointsEmotionMLP(nn.Module):
    """MLP do klasyfikacji emocji na podstawie keypoints."""
    
    def __init__(
        self,
        input_dim=INPUT_FEATURES,
        hidden_dims=None,
        num_classes=NUM_EMOTIONS,
        dropout=0.3,
    ):
        super().__init__()
        
        if hidden_dims is None:
            hidden_dims = [256, 128, 64]
        
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(inplace=True),
                nn.Dropout(dropout),
            ])
            prev_dim = hidden_dim
        
        layers.append(nn.Linear(prev_dim, num_classes))
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)


# Test modelu
model = KeypointsEmotionMLP(
    input_dim=INPUT_FEATURES,
    hidden_dims=HIDDEN_DIMS,
    num_classes=NUM_EMOTIONS,
    dropout=DROPOUT,
)
print(f'Model parameters: {sum(p.numel() for p in model.parameters()):,}')
print(model)

## 5. Przygotowanie danych

**Opcja A:** Użyj gotowych keypoints z DogFLW + etykiet emocji

**Opcja B:** Ekstrahuj keypoints z obrazów emotion dataset

In [None]:
# Sprawdź dostępne datasety
print('=== Dostępne datasety ===')

if os.path.exists(DOGFLW_PATH):
    print(f'✓ DogFLW: {DOGFLW_PATH}')
    print(f'  Files: {os.listdir(DOGFLW_PATH)[:5]}...')
else:
    print(f'✗ DogFLW not found at {DOGFLW_PATH}')

if os.path.exists(EMOTIONS_PATH):
    print(f'✓ Emotions: {EMOTIONS_PATH}')
    print(f'  Files: {os.listdir(EMOTIONS_PATH)[:5]}...')
else:
    print(f'✗ Emotions not found at {EMOTIONS_PATH}')

In [None]:
# === OPCJA A: Załaduj DogFLW keypoints ===
# DogFLW zawiera keypoints, ale potrzebujemy etykiet emocji

def load_dogflw_keypoints(dogflw_path):
    """Ładuje keypoints z DogFLW dataset."""
    # Szukaj pliku z anotacjami
    for filename in ['landmarks.csv', 'annotations.csv', 'keypoints.csv']:
        filepath = os.path.join(dogflw_path, filename)
        if os.path.exists(filepath):
            df = pd.read_csv(filepath)
            print(f'Loaded: {filepath}')
            print(f'Columns: {list(df.columns)[:10]}...')
            return df
    
    print('No keypoints CSV found in DogFLW')
    return None

# dogflw_df = load_dogflw_keypoints(DOGFLW_PATH)

In [None]:
# === OPCJA B: Ekstrahuj keypoints z emotion dataset ===

def prepare_emotion_dataset(emotions_path, keypoints_extractor=None):
    """
    Przygotowuje dataset z emotion images.
    
    Struktura oczekiwana:
    emotions_path/
        happy/
            img1.jpg
            img2.jpg
        sad/
            ...
    """
    data = []
    
    for emotion_folder in os.listdir(emotions_path):
        folder_path = os.path.join(emotions_path, emotion_folder)
        if not os.path.isdir(folder_path):
            continue
        
        emotion_name = emotion_folder.lower()
        if emotion_name not in EMOTION_MAPPING:
            print(f'Skipping unknown emotion: {emotion_name}')
            continue
        
        emotion_id = EMOTION_MAPPING[emotion_name]
        
        images = [f for f in os.listdir(folder_path) 
                  if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
        
        print(f'{emotion_name}: {len(images)} images')
        
        for img_name in tqdm(images, desc=emotion_name):
            img_path = os.path.join(folder_path, img_name)
            
            if keypoints_extractor:
                try:
                    keypoints = keypoints_extractor.extract(img_path)
                    # Flatten: [(x0,y0,v0), (x1,y1,v1), ...] → [x0,y0,v0,x1,y1,v1,...]
                    flat_keypoints = []
                    for x, y, v in keypoints:
                        flat_keypoints.extend([x, y, v])
                    
                    data.append({
                        'image_path': img_path,
                        'emotion_id': emotion_id,
                        'emotion_name': emotion_name,
                        'keypoints': flat_keypoints,
                    })
                except Exception as e:
                    print(f'Error processing {img_path}: {e}')
            else:
                data.append({
                    'image_path': img_path,
                    'emotion_id': emotion_id,
                    'emotion_name': emotion_name,
                })
    
    return pd.DataFrame(data)

print('prepare_emotion_dataset() defined')

## 6. Załaduj/Przygotuj dane

In [None]:
# Załaduj model keypoints (jeśli masz wagi)
KEYPOINTS_WEIGHTS = '/kaggle/input/dogflw/keypoints_best.pt'  # Dostosuj ścieżkę

if os.path.exists(KEYPOINTS_WEIGHTS):
    extractor = KeypointsExtractor(KEYPOINTS_WEIGHTS)
else:
    print('Keypoints weights not found. Using None (will generate synthetic data)')
    extractor = None

In [None]:
# Przygotuj dataset
if extractor and os.path.exists(EMOTIONS_PATH):
    print('Extracting keypoints from emotion images...')
    df = prepare_emotion_dataset(EMOTIONS_PATH, extractor)
    
    # Zapisz do CSV dla przyszłego użycia
    df.to_csv('emotion_keypoints_dataset.csv', index=False)
    print(f'Dataset saved: emotion_keypoints_dataset.csv ({len(df)} samples)')
else:
    print('Using synthetic data for testing...')
    # Generuj syntetyczne dane
    n_samples = 5000
    np.random.seed(42)
    
    synthetic_keypoints = np.random.randn(n_samples, INPUT_FEATURES).astype(np.float32)
    synthetic_labels = np.random.randint(0, NUM_EMOTIONS, n_samples)
    
    df = pd.DataFrame({
        'emotion_id': synthetic_labels,
        'keypoints': [list(kp) for kp in synthetic_keypoints],
    })
    print(f'Synthetic dataset: {len(df)} samples')

print(f'\nDataset shape: {df.shape}')
print(f'Emotion distribution:\n{df["emotion_id"].value_counts().sort_index()}')

## 7. Dataset i DataLoader

In [None]:
class EmotionKeypointsDataset(Dataset):
    """Dataset dla treningu emotion classifier."""
    
    def __init__(self, dataframe):
        self.df = dataframe.reset_index(drop=True)
        
        # Przygotuj keypoints jako numpy array
        if isinstance(self.df['keypoints'].iloc[0], str):
            # Jeśli keypoints są zapisane jako string
            self.keypoints = np.array([
                eval(kp) for kp in self.df['keypoints']
            ], dtype=np.float32)
        else:
            self.keypoints = np.array(
                self.df['keypoints'].tolist(), dtype=np.float32
            )
        
        self.labels = self.df['emotion_id'].values.astype(np.int64)
        
        # Normalizacja keypoints
        self.keypoints = self._normalize(self.keypoints)
    
    def _normalize(self, keypoints):
        """Normalizuje keypoints."""
        # Dla każdej próbki, normalizuj x,y względem zakresu
        normalized = keypoints.copy()
        
        for i in range(len(normalized)):
            kp = normalized[i]
            # x: indices 0, 3, 6, ...
            # y: indices 1, 4, 7, ...
            # v: indices 2, 5, 8, ...
            xs = kp[0::3]
            ys = kp[1::3]
            
            # Normalizuj do [-1, 1]
            if xs.max() > xs.min():
                xs = 2 * (xs - xs.min()) / (xs.max() - xs.min()) - 1
            if ys.max() > ys.min():
                ys = 2 * (ys - ys.min()) / (ys.max() - ys.min()) - 1
            
            kp[0::3] = xs
            kp[1::3] = ys
            # visibility pozostaje bez zmian
        
        return normalized
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return (
            torch.from_numpy(self.keypoints[idx]),
            torch.tensor(self.labels[idx]),
        )


# Stwórz dataset
full_dataset = EmotionKeypointsDataset(df)
print(f'Dataset size: {len(full_dataset)}')

# Split train/val
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = random_split(
    full_dataset, [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

print(f'Train: {len(train_dataset)}, Val: {len(val_dataset)}')

# DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2,
)

## 8. Trening

In [None]:
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for keypoints, labels in loader:
        keypoints = keypoints.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(keypoints)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100 * correct / total


def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for keypoints, labels in loader:
            keypoints = keypoints.to(device)
            labels = labels.to(device)
            
            outputs = model(keypoints)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    return total_loss / len(loader), 100 * correct / total

In [None]:
# Inicjalizacja
model = KeypointsEmotionMLP(
    input_dim=INPUT_FEATURES,
    hidden_dims=HIDDEN_DIMS,
    num_classes=NUM_EMOTIONS,
    dropout=DROPOUT,
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=10, verbose=True
)

# Historia
history = {
    'train_loss': [], 'train_acc': [],
    'val_loss': [], 'val_acc': [],
}
best_val_acc = 0
best_model_state = None

print(f'Starting training for {EPOCHS} epochs...')
print('=' * 70)

In [None]:
# Trening
for epoch in range(EPOCHS):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    scheduler.step(val_loss)
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = model.state_dict().copy()
        torch.save(best_model_state, 'emotion_keypoints.pt')
    
    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(
            f'Epoch {epoch+1:3d}/{EPOCHS} | '
            f'Train Loss: {train_loss:.4f} Acc: {train_acc:.2f}% | '
            f'Val Loss: {val_loss:.4f} Acc: {val_acc:.2f}% | '
            f'Best: {best_val_acc:.2f}%'
        )

print('=' * 70)
print(f'Training complete! Best validation accuracy: {best_val_acc:.2f}%')

## 9. Wizualizacja wyników

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(history['train_loss'], label='Train Loss')
axes[0].plot(history['val_loss'], label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training & Validation Loss')
axes[0].legend()
axes[0].grid(True)

# Accuracy
axes[1].plot(history['train_acc'], label='Train Acc')
axes[1].plot(history['val_acc'], label='Val Acc')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training & Validation Accuracy')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150)
plt.show()

## 10. Ewaluacja końcowa

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Załaduj najlepszy model
model.load_state_dict(torch.load('emotion_keypoints.pt'))
model.eval()

# Predykcje na validation set
all_preds = []
all_labels = []

with torch.no_grad():
    for keypoints, labels in val_loader:
        keypoints = keypoints.to(device)
        outputs = model(keypoints)
        _, predicted = outputs.max(1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

# Classification report
print('Classification Report:')
print(classification_report(
    all_labels, all_preds,
    target_names=EMOTION_CLASSES,
    digits=3
))

In [None]:
# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(10, 8))
sns.heatmap(
    cm, annot=True, fmt='d', cmap='Blues',
    xticklabels=EMOTION_CLASSES,
    yticklabels=EMOTION_CLASSES,
)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150)
plt.show()

## 11. Zapisz model i metryki

In [None]:
# Zapisz finalny model
torch.save(best_model_state, 'emotion_keypoints.pt')
print('Model saved: emotion_keypoints.pt')

# Zapisz metryki
metrics = {
    'best_val_accuracy': best_val_acc,
    'epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,
    'hidden_dims': HIDDEN_DIMS,
    'dropout': DROPOUT,
    'num_keypoints': NUM_KEYPOINTS,
    'input_features': INPUT_FEATURES,
    'emotion_classes': EMOTION_CLASSES,
    'train_samples': len(train_dataset),
    'val_samples': len(val_dataset),
    'history': history,
}

with open('emotion_keypoints_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print('Metrics saved: emotion_keypoints_metrics.json')

## 12. Weryfikacja modelu

In [None]:
# Test że model można załadować i użyć
test_model = KeypointsEmotionMLP(
    input_dim=INPUT_FEATURES,
    hidden_dims=HIDDEN_DIMS,
    num_classes=NUM_EMOTIONS,
    dropout=DROPOUT,
)
test_model.load_state_dict(torch.load('emotion_keypoints.pt'))
test_model.eval()

# Test inference
dummy_input = torch.randn(1, INPUT_FEATURES)
with torch.no_grad():
    output = test_model(dummy_input)
    probs = torch.softmax(output, dim=1)[0]

print('Test inference:')
print(f'Input shape: {dummy_input.shape}')
print(f'Output shape: {output.shape}')
print('Probabilities:')
for i, emotion in enumerate(EMOTION_CLASSES):
    print(f'  {emotion}: {probs[i].item():.2%}')

print('\n✓ Model verified successfully!')

---

## Instrukcje po treningu

### Pobierz następujące pliki:

1. **`emotion_keypoints.pt`** - wagi modelu (WYMAGANE)
2. **`emotion_keypoints_metrics.json`** - metryki treningu
3. **`training_history.png`** - wykres treningu
4. **`confusion_matrix.png`** - macierz pomyłek

### Przekaż plik `emotion_keypoints.pt` do projektu:

Umieść go w katalogu `models/` projektu.

### Oczekiwana dokładność:

- Na syntetycznych danych: ~16-20% (random baseline dla 6 klas)
- Na prawdziwych danych: 40-70% (zależnie od jakości)

---