# 02 - Feature Extraction
Extract ResNet-18 features from Kinect IR video segments.

**Runtime:** GPU recommended (T4 or better).

In [None]:
# Colab Setup
import os
IN_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

    REPO_DIR = '/content/Driver-Activity-Recognition'
    if not os.path.exists(REPO_DIR):
        !git clone https://github.com/batuhne/Driver-Activity-Recognition.git {REPO_DIR}

    os.chdir(REPO_DIR)
    !pip install -q -r requirements.txt
    DATA_ROOT = '/content/drive/MyDrive/DriveAndAct'
else:
    DATA_ROOT = './data'

print(f'Working directory: {os.getcwd()}')
print(f'Data root: {DATA_ROOT}')

In [None]:
import torch
import numpy as np
from src.utils import load_config, set_seed
from src.feature_extract import extract_features

config = load_config()
if IN_COLAB:
    config['data']['root'] = DATA_ROOT

print(f'GPU available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# Run feature extraction
extract_features(config)

## Verify Extracted Features

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

feature_dir = os.path.join(config['data']['root'], config['features']['save_dir'])

for split in ['train', 'val', 'test']:
    manifest = pd.read_csv(os.path.join(feature_dir, split, 'manifest.csv'))
    print(f'{split}: {len(manifest)} features extracted')
    
    # Check a sample
    sample_path = os.path.join(feature_dir, split, manifest.iloc[0]['filename'])
    sample = np.load(sample_path)
    print(f'  Shape: {sample.shape}, dtype: {sample.dtype}')

## t-SNE Visualization

In [None]:
from sklearn.manifold import TSNE

# Load a subset of features for t-SNE
manifest = pd.read_csv(os.path.join(feature_dir, 'val', 'manifest.csv'))
features_list, labels_list, activities = [], [], []

for _, row in manifest.iterrows():
    feat = np.load(os.path.join(feature_dir, 'val', row['filename'])).astype(np.float32)
    # Mean-pool across time dimension
    features_list.append(feat.mean(axis=0))
    labels_list.append(row['label'])
    activities.append(row['activity'])

X = np.stack(features_list)
y = np.array(labels_list)

print(f't-SNE on {X.shape[0]} samples, {X.shape[1]} dims')
tsne = TSNE(n_components=2, random_state=42, perplexity=30)
X_2d = tsne.fit_transform(X)

fig, ax = plt.subplots(figsize=(12, 10))
scatter = ax.scatter(X_2d[:, 0], X_2d[:, 1], c=y, cmap='tab20', alpha=0.6, s=10)
ax.set_title('t-SNE of ResNet-18 Features (Validation Set)')
plt.colorbar(scatter)
plt.tight_layout()
plt.show()