In [41]:
# 1) Environment check and setup
import sys, os, random
import numpy as np
import torch
import torchvision

print('Python:', sys.version)
print('PyTorch:', torch.__version__)
print('torchvision:', torchvision.__version__)
print('CUDA available:', torch.cuda.is_available())
print('Device count:', torch.cuda.device_count())

# Reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)


Python: 3.11.9 (v3.11.9:de54cf5be3, Apr  2 2024, 07:12:50) [Clang 13.0.0 (clang-1300.0.29.30)]
PyTorch: 2.2.2
torchvision: 0.17.2
CUDA available: False
Device count: 0
Using device: cpu


In [43]:
import requests, tarfile

urls = [
    "https://huggingface.co/datasets/stockeh/dog-pose-cv/resolve/main/data/images.tar.gz",
    "https://huggingface.co/datasets/stockeh/dog-pose-cv/resolve/main/data/labels.tar.gz"
]

output_dir = "./dog_pose_data"
os.makedirs(output_dir, exist_ok=True)

for url in urls:
    filename = os.path.join(output_dir, url.split("/")[-1])
    print(f"Downloading {url} to {filename}")
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"Downloaded {filename}")

        # Extract the tar.gz file
        if filename.endswith(".tar.gz"):
            print(f"Extracting {filename}")
            with tarfile.open(filename, "r:gz") as tar:
                tar.extractall(path=output_dir)
            print(f"Extracted {filename}")
    else:
        print(f"Failed to download {url}. Status code: {response.status_code}")

Downloading https://huggingface.co/datasets/stockeh/dog-pose-cv/resolve/main/data/images.tar.gz to ./dog_pose_data/images.tar.gz
Downloaded ./dog_pose_data/images.tar.gz
Extracting ./dog_pose_data/images.tar.gz
Extracted ./dog_pose_data/images.tar.gz
Downloading https://huggingface.co/datasets/stockeh/dog-pose-cv/resolve/main/data/labels.tar.gz to ./dog_pose_data/labels.tar.gz
Downloaded ./dog_pose_data/labels.tar.gz
Extracting ./dog_pose_data/labels.tar.gz
Extracted ./dog_pose_data/labels.tar.gz


In [44]:
extracted_files = os.listdir(output_dir)
print("Extracted files:")
for item in extracted_files:
    print(item)

# You might want to inspect the contents of some of these files
# For example, if there are subdirectories, list their contents
# Or if there are annotation files (like .txt or .xml), read a few lines

Extracted files:
labels.tar.gz
images
labels
._labels
._images
images.tar.gz


In [45]:
import pandas as pd

labels_dir = os.path.join(output_dir, 'labels')
all_labels_list = []

# Iterate through each label file in the labels directory
for filename in os.listdir(labels_dir):
    # Skip macOS resource fork files and non-CSV
    if filename.startswith('._') or not filename.endswith('.csv'):
        continue

    file_path = os.path.join(labels_dir, filename)
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        # Add a column for the breed name (derived from the filename)
        breed_name = filename.replace('.csv', '')
        df['breed'] = breed_name
        all_labels_list.append(df)
    except Exception as e:
        print(f"Error reading {filename}: {e}")

# Concatenate all DataFrames into a single DataFrame
all_labels_df = pd.concat(all_labels_list, ignore_index=True)

print("Combined labels DataFrame:")
display(all_labels_df.head())
display(all_labels_df.tail())

Combined labels DataFrame:


Unnamed: 0,id,label,breed
0,n02085936_8767.jpg,sitting,n02085936-Maltese_dog
1,n02085936_20610.jpg,sitting,n02085936-Maltese_dog
2,n02085936_16188.jpg,lying,n02085936-Maltese_dog
3,n02085936_7865.jpg,undefined,n02085936-Maltese_dog
4,n02085936_500.jpg,standing,n02085936-Maltese_dog


Unnamed: 0,id,label,breed
20418,n02113023_6140.jpg,lying,n02113023-Pembroke
20419,n02113023_4024.jpg,standing,n02113023-Pembroke
20420,n02113023_2482.jpg,sitting,n02113023-Pembroke
20421,n02113023_9306.jpg,undefined,n02113023-Pembroke
20422,n02113023_4391.jpg,undefined,n02113023-Pembroke


In [46]:
# Build robust image filename mapping from label IDs
from pathlib import Path
import re

images_dir = os.path.join(output_dir, 'images')

# Index files
stem_to_rel = {}
name_to_rel = {}
tailnum_to_rel = {}
for root, _, files in os.walk(images_dir):
    for fname in files:
        fpath = os.path.join(root, fname)
        rel = os.path.relpath(fpath, images_dir)
        name_to_rel[fname] = rel
        stem = Path(fname).stem
        stem_to_rel[stem] = rel
        m = re.search(r"_(\d+)$", stem)
        if m:
            tailnum_to_rel[m.group(1)] = rel

print('Indexed images -> names:', len(name_to_rel), 'stems:', len(stem_to_rel), 'tailnums:', len(tailnum_to_rel))

# Heuristic mapper

def map_id_to_rel(row):
    sid = str(row.get('id', ''))
    if not sid:
        return None
    # 1) exact stem
    if sid in stem_to_rel:
        return stem_to_rel[sid]
    # 2) if looks like filename
    if sid in name_to_rel:
        return name_to_rel[sid]
    # 3) strip extension and retry
    base = Path(sid).stem
    if base in stem_to_rel:
        return stem_to_rel[base]
    # 4) numeric tail (e.g., id='1234' matches 'n02105162_1234')
    if sid.isdigit() and sid in tailnum_to_rel:
        return tailnum_to_rel[sid]
    # 5) synset prefix + '_' + id using breed
    breed = str(row.get('breed', ''))
    if '-' in breed:
        synset = breed.split('-', 1)[0]
        cand = f"{synset}_{sid}"
        if cand in stem_to_rel:
            return stem_to_rel[cand]
    return None

if 'id' in all_labels_df.columns:
    all_labels_df['image'] = all_labels_df.apply(map_id_to_rel, axis=1)
else:
    print('Warning: no id column found in labels; please inspect label schema')

before = len(all_labels_df)
missing_mask = all_labels_df['image'].isna()
missing_count = int(missing_mask.sum())
print(f'Unmapped after heuristics: {missing_count} / {before}')
if missing_count > 0:
    print('Sample unmapped rows:')
    display(all_labels_df.loc[missing_mask, ['id','label','breed']].head(10))

all_labels_df = all_labels_df.dropna(subset=['image']).copy()
after = len(all_labels_df)
print(f'Mapped images for {after}/{before} rows')
display(all_labels_df[['id','label','breed','image']].head())


Indexed images -> names: 41305 stems: 41305 tailnums: 9758
Unmapped after heuristics: 0 / 20423
Mapped images for 20423/20423 rows


Unnamed: 0,id,label,breed,image
0,n02085936_8767.jpg,sitting,n02085936-Maltese_dog,n02085936-Maltese_dog/n02085936_8767.jpg
1,n02085936_20610.jpg,sitting,n02085936-Maltese_dog,n02085936-Maltese_dog/n02085936_20610.jpg
2,n02085936_16188.jpg,lying,n02085936-Maltese_dog,n02085936-Maltese_dog/n02085936_16188.jpg
3,n02085936_7865.jpg,undefined,n02085936-Maltese_dog,n02085936-Maltese_dog/n02085936_7865.jpg
4,n02085936_500.jpg,standing,n02085936-Maltese_dog,n02085936-Maltese_dog/n02085936_500.jpg


In [47]:
# Quick schema + label distribution check
print('DataFrame shape:', all_labels_df.shape)
print('Columns:', list(all_labels_df.columns))

# Peek a few rows
display(all_labels_df.sample(min(5, len(all_labels_df))))

# Basic checks for expected columns
expected_cols = {'image', 'label'}
missing = [c for c in expected_cols if c not in all_labels_df.columns]
if missing:
    print('Missing expected columns:', missing)

# Label distribution
if 'label' in all_labels_df.columns:
    print('\nLabel distribution (top 20):')
    display(all_labels_df['label'].value_counts().head(20))


DataFrame shape: (20423, 4)
Columns: ['id', 'label', 'breed', 'image']


Unnamed: 0,id,label,breed,image
17400,n02108000_261.jpg,standing,n02108000-EntleBucher,n02108000-EntleBucher/n02108000_261.jpg
5937,n02095570_916.jpg,standing,n02095570-Lakeland_terrier,n02095570-Lakeland_terrier/n02095570_916.jpg
18134,n02094114_161.jpg,sitting,n02094114-Norfolk_terrier,n02094114-Norfolk_terrier/n02094114_161.jpg
19888,n02108089_13839.jpg,undefined,n02108089-boxer,n02108089-boxer/n02108089_13839.jpg
14067,n02113799_7448.jpg,undefined,n02113799-standard_poodle,n02113799-standard_poodle/n02113799_7448.jpg



Label distribution (top 20):


label
standing     7047
undefined    6258
lying        4110
sitting      3008
Name: count, dtype: int64

In [48]:
# Normalize labels to the three categories and map to indices
import re

def normalize_pose(x: str) -> str:
    if not isinstance(x, str):
        return None
    s = x.strip().lower()
    # common variants
    if re.search(r"sit|sitting", s):
        return "sitting"
    if re.search(r"stand|standing", s):
        return "standing"
    if re.search(r"lie|lying|laying", s):
        return "lying"
    return None

all_labels_df['pose'] = all_labels_df['label'].apply(normalize_pose)
filtered_df = all_labels_df.dropna(subset=['pose']).copy()

class_names = ['sitting', 'standing', 'lying']
class_to_idx = {c: i for i, c in enumerate(class_names)}
filtered_df['target'] = filtered_df['pose'].map(class_to_idx)

print('Kept rows:', len(filtered_df), '| Original:', len(all_labels_df))
display(filtered_df['pose'].value_counts())
print('Class mapping:', class_to_idx)


Kept rows: 14165 | Original: 20423


pose
standing    7047
lying       4110
sitting     3008
Name: count, dtype: int64

Class mapping: {'sitting': 0, 'standing': 1, 'lying': 2}


In [55]:
# Optional: use a very small stratified subset for a quick sanity run
use_small_subset = True
samples_per_class = 500  # adjust small number like 10/20/50

if use_small_subset:
    subset_parts = []
    for cls_idx in sorted(filtered_df['target'].unique()):
        part = filtered_df[filtered_df['target'] == cls_idx].sample(
            n=min(samples_per_class, (filtered_df['target'] == cls_idx).sum()),
            random_state=42,
        )
        subset_parts.append(part)
    subset_df = pd.concat(subset_parts, ignore_index=True).sample(frac=1.0, random_state=42).reset_index(drop=True)
    print('Small subset size:', len(subset_df), 'per class cap:', samples_per_class)
else:
    subset_df = filtered_df.copy()

# Quick check
print(subset_df['target'].value_counts().to_dict())


Small subset size: 1500 per class cap: 500
{2: 500, 0: 500, 1: 500}


In [56]:
# Stratified train/val/test split (70/15/15)
from sklearn.model_selection import StratifiedShuffleSplit

source_df = subset_df if 'subset_df' in globals() else filtered_df

X = source_df[['image']].reset_index(drop=True)
y = source_df['target'].reset_index(drop=True)

sss1 = StratifiedShuffleSplit(n_splits=1, test_size=0.30, random_state=42)
train_idx, temp_idx = next(sss1.split(X, y))

X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_temp, y_temp = X.iloc[temp_idx], y.iloc[temp_idx]

sss2 = StratifiedShuffleSplit(n_splits=1, test_size=0.50, random_state=42)
val_idx, test_idx = next(sss2.split(X_temp, y_temp))

X_val, y_val = X_temp.iloc[val_idx], y_temp.iloc[val_idx]
X_test, y_test = X_temp.iloc[test_idx], y_temp.iloc[test_idx]

print('Train:', len(X_train), 'Val:', len(X_val), 'Test:', len(X_test))
print('Train dist:', y_train.value_counts().to_dict())
print('Val dist:', y_val.value_counts().to_dict())
print('Test dist:', y_test.value_counts().to_dict())


Train: 1050 Val: 225 Test: 225
Train dist: {0: 350, 1: 350, 2: 350}
Val dist: {2: 75, 1: 75, 0: 75}
Test dist: {1: 75, 2: 75, 0: 75}


In [57]:
# Dataset and DataLoaders
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

images_dir = os.path.join(output_dir, 'images')

train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

eval_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

class DogPoseDataset(Dataset):
    def __init__(self, X_df, y_series, transform=None):
        self.images = X_df['image'].tolist()
        self.targets = y_series.tolist()
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(images_dir, img_name)
        with Image.open(img_path) as img:
            img = img.convert('RGB')
        if self.transform:
            img = self.transform(img)
        target = self.targets[idx]
        return img, target

train_ds = DogPoseDataset(X_train, y_train, transform=train_transform)
val_ds = DogPoseDataset(X_val, y_val, transform=eval_transform)
test_ds = DogPoseDataset(X_test, y_test, transform=eval_transform)

batch_size = 16
num_workers = 0  # Jupyter safety to avoid pickling issues

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False, num_workers=num_workers)

len(train_ds), len(val_ds), len(test_ds)


(1050, 225, 225)

In [58]:
# Model: ResNet18 fine-tune
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

num_classes = 3

weights = models.ResNet18_Weights.DEFAULT
model = models.resnet18(weights=weights)
# Replace final layer
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)

model = model.to(device)

# Optionally freeze backbone for a quick start
for name, param in model.named_parameters():
    param.requires_grad = True  # full fine-tune; set False except fc for faster start

# Class weights for imbalance (optional)
from collections import Counter
cls_counts = Counter(y_train.tolist())
cls_weights = torch.tensor([1.0 / cls_counts.get(i, 1) for i in range(num_classes)], dtype=torch.float32, device=device)

criterion = nn.CrossEntropyLoss(weight=cls_weights)
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)

epochs = 1  # quick sanity run; increase after verifying pipeline

best_val_loss = float('inf')
best_state = None

for epoch in range(1, epochs + 1):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = torch.tensor(targets, dtype=torch.long, device=device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = outputs.max(1)
        correct += (preds == targets).sum().item()
        total += targets.size(0)

    train_loss = running_loss / max(total, 1)
    train_acc = correct / max(total, 1)

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device)
            targets = torch.tensor(targets, dtype=torch.long, device=device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
            _, preds = outputs.max(1)
            val_correct += (preds == targets).sum().item()
            val_total += targets.size(0)

    val_loss = val_loss / max(val_total, 1)
    val_acc = val_correct / max(val_total, 1)

    scheduler.step(val_loss)

    print(f"Epoch {epoch}/{epochs} | train_loss {train_loss:.4f} acc {train_acc:.4f} | val_loss {val_loss:.4f} acc {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_state = {k: v.cpu() for k, v in model.state_dict().items()}

# Load best
if best_state is not None:
    model.load_state_dict(best_state)

print('Training complete. Best val loss:', best_val_loss)


  targets = torch.tensor(targets, dtype=torch.long, device=device)


KeyboardInterrupt: 

In [None]:
# Evaluation on test set
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

model.eval()
all_preds = []
all_targets = []
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        targets = torch.tensor(targets, dtype=torch.long, device=device)
        outputs = model(inputs)
        _, preds = outputs.max(1)
        all_preds.extend(preds.cpu().numpy().tolist())
        all_targets.extend(targets.cpu().numpy().tolist())

all_preds = np.array(all_preds)
all_targets = np.array(all_targets)

acc = accuracy_score(all_targets, all_preds)
print('Test accuracy:', acc)
print('\nClassification report:')
print(classification_report(all_targets, all_preds, target_names=class_names))

cm = confusion_matrix(all_targets, all_preds)
print('Confusion matrix:\n', cm)


  targets = torch.tensor(targets, dtype=torch.long, device=device)


Test accuracy: 0.3333333333333333

Classification report:
              precision    recall  f1-score   support

     sitting       0.00      0.00      0.00         3
    standing       1.00      0.33      0.50         3
       lying       0.40      0.67      0.50         3

    accuracy                           0.33         9
   macro avg       0.47      0.33      0.33         9
weighted avg       0.47      0.33      0.33         9

Confusion matrix:
 [[0 0 3]
 [2 1 0]
 [1 0 2]]
