# Module 1: Posture Analysis (Lying vs Standing)

This notebook implements the data pipeline for the Posture module.

**Steps:**
1. **Load Data**: Load raw accelerometer data (or generate mock data if none exists).
2. **Preprocessing**: Clean and window the data.
3. **Feature Extraction**: Calculate Mean, Variance, and Tilt.
4. **Training**: Train a Random Forest Classifier.
5. **Export**: Save the model for the Web Dashboard.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import os

# Configuration
DATA_PATH = '../data/raw/posture_samples/sample_cow.csv'
MODEL_SAVE_PATH = '../models/posture/posture_model.pkl'
WINDOW_SIZE = 50  # Number of samples per window (e.g., 50 samples @ 10Hz = 5 seconds)
STEP_SIZE = 25    # 50% overlap

## 1. Data Loading (or Mock Generation)
Since we might not have the raw file yet, we will generate synthetic data that looks like a cow standing and lying down.

In [None]:
def generate_mock_data(n_samples=1000):
    """Generates synthetic accelerometer data for testing."""
    print("⚠️ Generating MOCK DATA... (Use this to verify the pipeline)")
    
    # Time vector
    t = np.linspace(0, 100, n_samples)
    
    # State 0: Standing (High Y-axis gravity, some movement)
    # Accel: X~0, Y~1g (9.8), Z~0
    standing_len = n_samples // 2
    acc_x_stand = np.random.normal(0, 0.1, standing_len)
    acc_y_stand = np.random.normal(9.8, 0.2, standing_len)
    acc_z_stand = np.random.normal(0, 0.1, standing_len)
    labels_stand = np.zeros(standing_len) # 0 = Standing
    
    # State 1: Lying (High Z-axis gravity, low movement)
    # Accel: X~0, Y~0, Z~1g (9.8) (Assuming lying on side/stomach)
    lying_len = n_samples - standing_len
    acc_x_lie = np.random.normal(0, 0.05, lying_len)
    acc_y_lie = np.random.normal(0, 0.05, lying_len)
    acc_z_lie = np.random.normal(9.8, 0.05, lying_len)
    labels_lie = np.ones(lying_len) # 1 = Lying
    
    # Combine
    df = pd.DataFrame({
        'acc_x': np.concatenate([acc_x_stand, acc_x_lie]),
        'acc_y': np.concatenate([acc_y_stand, acc_y_lie]),
        'acc_z': np.concatenate([acc_z_stand, acc_z_lie]),
        'label': np.concatenate([labels_stand, labels_lie])
    })
    
    # Shuffle slightly to simulate transitions (optional, keeping simple for now)
    return df

# Try to load real data, else use mock
if os.path.exists(DATA_PATH):
    print(f"✅ Loading real data from {DATA_PATH}")
    df = pd.read_csv(DATA_PATH)
else:
    df = generate_mock_data()
    
print(f"Data Shape: {df.shape}")
df.head()

## 2. Feature Extraction
We use a sliding window approach. For each window, we calculate:
- **Mean**: Average orientation.
- **Std Dev**: Intensity of movement.
- **Tilt**: Angle relative to gravity (Z-axis).

In [None]:
def extract_features(data, window_size, step_size):
    features = []
    labels = []
    
    for i in range(0, len(data) - window_size, step_size):
        window = data.iloc[i : i + window_size]
        
        # Features
        f_mean_x = window['acc_x'].mean()
        f_mean_y = window['acc_y'].mean()
        f_mean_z = window['acc_z'].mean()
        
        f_std_x = window['acc_x'].std()
        f_std_y = window['acc_y'].std()
        f_std_z = window['acc_z'].std()
        
        # Simple Tilt (using Z-axis as proxy for now)
        # In real app, use arctan2(y, z) for precise angle
        f_tilt = np.arctan2(f_mean_y, f_mean_z) * 180 / np.pi
        
        features.append([f_mean_x, f_mean_y, f_mean_z, f_std_x, f_std_y, f_std_z, f_tilt])
        
        # Label (Majority vote in window)
        labels.append(window['label'].mode()[0])
        
    feature_names = ['mean_x', 'mean_y', 'mean_z', 'std_x', 'std_y', 'std_z', 'tilt']
    return pd.DataFrame(features, columns=feature_names), np.array(labels)

X, y = extract_features(df, WINDOW_SIZE, STEP_SIZE)
print(f"Extracted Features: {X.shape}")
X.head()

## 3. Model Training
We train a Random Forest classifier. It's robust and easy to interpret.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

## 4. Export for Dashboard
We save the model. For the web, we might convert this to ONNX later, but for now, we just save the pickle to confirm the pipeline works.

In [None]:
if not os.path.exists('../models/posture'):
    os.makedirs('../models/posture')

joblib.dump(clf, MODEL_SAVE_PATH)
print(f"Model saved to {MODEL_SAVE_PATH}")