# Body Type Classifier for Dress Recommendation Demo

This notebook creates a tiny body type classifier that can run in the browser via TensorFlow.js or simple coefficient-based classification. The model will classify users into 5 body types based on shoulder/waist/hip ratios extracted from pose detection.

## Body Types:
- **Hourglass**: Balanced shoulders/hips with defined waist
- **Pear**: Larger hips relative to shoulders  
- **Apple**: Larger midsection, less defined waist
- **Rectangle**: Similar measurements throughout
- **Inverted Triangle**: Broader shoulders than hips

In [None]:
# Setup and Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import json
import os
from pathlib import Path

# TensorFlow for alternative model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Dependencies loaded successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")

## Generate Synthetic Body Type Dataset

We'll create synthetic data based on typical body proportions:
- **r1 = waist/shoulder ratio**
- **r2 = hip/shoulder ratio**

Each body type has characteristic ranges for these ratios.

In [None]:
def generate_synthetic_data(n_samples_per_class=200):
    """Generate synthetic body type data based on typical proportions"""
    
    # Define characteristic ranges for each body type
    body_type_params = {
        'hourglass': {
            'r1_mean': 0.72, 'r1_std': 0.06,  # defined waist
            'r2_mean': 0.95, 'r2_std': 0.08   # hips similar to shoulders
        },
        'pear': {
            'r1_mean': 0.78, 'r1_std': 0.05,  # less defined waist
            'r2_mean': 1.15, 'r2_std': 0.10   # larger hips
        },
        'apple': {
            'r1_mean': 0.88, 'r1_std': 0.07,  # less defined waist
            'r2_mean': 0.92, 'r2_std': 0.06   # smaller hips
        },
        'rectangle': {
            'r1_mean': 0.85, 'r1_std': 0.04,  # straight through
            'r2_mean': 0.90, 'r2_std': 0.05   # similar measurements
        },
        'inverted_triangle': {
            'r1_mean': 0.82, 'r1_std': 0.05,  # broader shoulders
            'r2_mean': 0.78, 'r2_std': 0.08   # narrower hips
        }
    }
    
    data = []
    labels = []
    
    for body_type, params in body_type_params.items():
        # Generate r1 (waist/shoulder) ratios
        r1_samples = np.random.normal(
            params['r1_mean'], params['r1_std'], n_samples_per_class
        )
        
        # Generate r2 (hip/shoulder) ratios
        r2_samples = np.random.normal(
            params['r2_mean'], params['r2_std'], n_samples_per_class
        )
        
        # Clip to reasonable bounds
        r1_samples = np.clip(r1_samples, 0.6, 1.0)
        r2_samples = np.clip(r2_samples, 0.7, 1.3)
        
        # Add samples to dataset
        for r1, r2 in zip(r1_samples, r2_samples):
            data.append([r1, r2])
            labels.append(body_type)
    
    return np.array(data), np.array(labels)

# Generate the dataset
X, y = generate_synthetic_data(n_samples_per_class=200)
print(f"Generated {len(X)} samples with {len(np.unique(y))} body types")
print(f"Features shape: {X.shape}")
print(f"Body types: {np.unique(y)}")

## Visualize Body Type Clusters

Let's visualize the synthetic data to see how well the different body types are separated in the ratio space.

In [None]:
# Create DataFrame for easier plotting
df = pd.DataFrame(X, columns=['r1_waist_shoulder', 'r2_hip_shoulder'])
df['body_type'] = y

# Create visualization
plt.figure(figsize=(12, 8))

# Color map for body types
colors = {'hourglass': 'red', 'pear': 'green', 'apple': 'orange', 
          'rectangle': 'blue', 'inverted_triangle': 'purple'}

for body_type in df['body_type'].unique():
    mask = df['body_type'] == body_type
    plt.scatter(df[mask]['r1_waist_shoulder'], df[mask]['r2_hip_shoulder'], 
                c=colors[body_type], label=body_type, alpha=0.6, s=30)

plt.xlabel('r1: Waist/Shoulder Ratio', fontsize=12)
plt.ylabel('r2: Hip/Shoulder Ratio', fontsize=12)
plt.title('Body Type Classification - Synthetic Dataset', fontsize=14)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Print summary statistics
print("Dataset Summary:")
print(df.groupby('body_type').agg({
    'r1_waist_shoulder': ['mean', 'std'],
    'r2_hip_shoulder': ['mean', 'std']
}).round(3))

## Train Logistic Regression Classifier

We'll train a simple logistic regression model that can be easily exported as JSON coefficients.

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

# Train logistic regression model
lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train, y_train)

# Make predictions
y_pred = lr_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nLogistic Regression Accuracy: {accuracy:.3f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_test, y_pred, labels=lr_model.classes_)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=lr_model.classes_, yticklabels=lr_model.classes_)
plt.title('Confusion Matrix - Logistic Regression')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

## Export Coefficients to JSON

Export the trained logistic regression weights and biases to a JSON file that the React app can load.

In [None]:
# Create the coefficients dictionary
coefficients_data = {
    "model_type": "logistic_regression",
    "labels": lr_model.classes_.tolist(),
    "coefficients": lr_model.coef_.tolist(),
    "intercept": lr_model.intercept_.tolist(),
    "feature_names": ["r1_waist_shoulder", "r2_hip_shoulder"],
    "accuracy": float(accuracy),
    "created_timestamp": pd.Timestamp.now().isoformat()
}

# Create output directory
output_dir = Path("../public/model")
output_dir.mkdir(parents=True, exist_ok=True)

# Save coefficients to JSON
coefficients_path = output_dir / "coefficients.json"
with open(coefficients_path, 'w') as f:
    json.dump(coefficients_data, f, indent=2)

print(f"Coefficients exported to: {coefficients_path}")
print(f"File size: {coefficients_path.stat().st_size} bytes")

# Display the structure
print("\nExported coefficients structure:")
for key, value in coefficients_data.items():
    if isinstance(value, list) and len(value) > 3:
        print(f"  {key}: [array with {len(value)} elements]")
    else:
        print(f"  {key}: {value}")

## Train Alternative TensorFlow Model

Create a small neural network as an alternative to the logistic regression model.

In [None]:
# Prepare data for TensorFlow
from sklearn.preprocessing import LabelEncoder

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
n_classes = len(np.unique(y_encoded))

# Split encoded data
X_train_tf, X_test_tf, y_train_tf, y_test_tf = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Convert to TensorFlow format
y_train_tf_cat = tf.keras.utils.to_categorical(y_train_tf, n_classes)
y_test_tf_cat = tf.keras.utils.to_categorical(y_test_tf, n_classes)

# Create a simple neural network
tf_model = keras.Sequential([
    layers.Input(shape=(2,)),
    layers.Dense(16, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(8, activation='relu'),
    layers.Dense(n_classes, activation='softmax')
])

tf_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("TensorFlow Model Summary:")
tf_model.summary()

# Train the model
history = tf_model.fit(
    X_train_tf, y_train_tf_cat,
    validation_data=(X_test_tf, y_test_tf_cat),
    epochs=50,
    batch_size=32,
    verbose=1
)

# Evaluate
tf_loss, tf_accuracy = tf_model.evaluate(X_test_tf, y_test_tf_cat, verbose=0)
print(f"\nTensorFlow Model Accuracy: {tf_accuracy:.3f}")

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()

## Convert TensorFlow Model to TensorFlow.js

Convert the trained model to TensorFlow.js format for use in the browser.

In [None]:
try:
    import tensorflowjs as tfjs
    
    # Save the model in TensorFlow.js format
    tfjs_path = output_dir / "tfjs_model"
    tfjs_path.mkdir(exist_ok=True)
    
    tfjs.converters.save_keras_model(tf_model, str(tfjs_path))
    print(f"TensorFlow.js model saved to: {tfjs_path}")
    
    # Create metadata file for the TensorFlow.js model
    tfjs_metadata = {
        "model_type": "tensorflow",
        "labels": label_encoder.classes_.tolist(),
        "input_shape": [2],
        "output_shape": [n_classes],
        "feature_names": ["r1_waist_shoulder", "r2_hip_shoulder"],
        "accuracy": float(tf_accuracy),
        "created_timestamp": pd.Timestamp.now().isoformat()
    }
    
    metadata_path = output_dir / "tfjs_metadata.json"
    with open(metadata_path, 'w') as f:
        json.dump(tfjs_metadata, f, indent=2)
    
    print(f"Metadata saved to: {metadata_path}")
    
    # List generated files
    print("\nGenerated TensorFlow.js files:")
    for file in tfjs_path.iterdir():
        print(f"  {file.name}: {file.stat().st_size} bytes")
        
except ImportError:
    print("TensorFlow.js converter not available. Installing...")
    import subprocess
    import sys
    
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'tensorflowjs'])
        import tensorflowjs as tfjs
        
        # Retry the conversion
        tfjs_path = output_dir / "tfjs_model"
        tfjs_path.mkdir(exist_ok=True)
        
        tfjs.converters.save_keras_model(tf_model, str(tfjs_path))
        print(f"TensorFlow.js model saved to: {tfjs_path}")
        
        # Create metadata
        tfjs_metadata = {
            "model_type": "tensorflow",
            "labels": label_encoder.classes_.tolist(),
            "input_shape": [2],
            "output_shape": [n_classes],
            "feature_names": ["r1_waist_shoulder", "r2_hip_shoulder"],
            "accuracy": float(tf_accuracy),
            "created_timestamp": pd.Timestamp.now().isoformat()
        }
        
        metadata_path = output_dir / "tfjs_metadata.json"
        with open(metadata_path, 'w') as f:
            json.dump(tfjs_metadata, f, indent=2)
        
        print(f"Metadata saved to: {metadata_path}")
        
    except Exception as e:
        print(f"Could not install/use TensorFlow.js converter: {e}")
        print("Skipping TensorFlow.js model export. The coefficients.json model will still work.")
        
        # Create a placeholder metadata file
        tfjs_metadata = {
            "model_type": "tensorflow_unavailable",
            "labels": label_encoder.classes_.tolist(),
            "note": "TensorFlow.js conversion failed, using coefficients.json instead"
        }
        
        metadata_path = output_dir / "tfjs_metadata.json"
        with open(metadata_path, 'w') as f:
            json.dump(tfjs_metadata, f, indent=2)

## Validation and Sanity Checks

Test both exported models with sample predictions to ensure they work correctly.

In [None]:
# Test samples representing different body types
test_samples = np.array([
    [0.72, 0.95],  # hourglass
    [0.78, 1.15],  # pear
    [0.88, 0.92],  # apple
    [0.85, 0.90],  # rectangle
    [0.82, 0.78]   # inverted_triangle
])

expected_types = ['hourglass', 'pear', 'apple', 'rectangle', 'inverted_triangle']

print("=== Sanity Check Results ===")
print(f"{'Sample':<12} {'r1':<6} {'r2':<6} {'LogReg':<17} {'TF':<17} {'Expected':<17}")
print("-" * 80)

for i, (sample, expected) in enumerate(zip(test_samples, expected_types)):
    # Logistic regression prediction
    lr_pred = lr_model.predict([sample])[0]
    lr_prob = lr_model.predict_proba([sample])[0].max()
    
    # TensorFlow prediction
    tf_pred_probs = tf_model.predict([sample], verbose=0)[0]
    tf_pred_idx = np.argmax(tf_pred_probs)
    tf_pred = label_encoder.classes_[tf_pred_idx]
    tf_prob = tf_pred_probs[tf_pred_idx]
    
    print(f"Sample {i+1:<5} {sample[0]:<6.2f} {sample[1]:<6.2f} "
          f"{lr_pred:<12}({lr_prob:.2f}) {tf_pred:<12}({tf_prob:.2f}) {expected:<17}")

print("\n=== Model Comparison ===")
print(f"Logistic Regression Accuracy: {accuracy:.3f}")
print(f"TensorFlow Model Accuracy:    {tf_accuracy:.3f}")

# Test loading the exported JSON coefficients
print("\n=== JSON Coefficients Test ===")
with open(coefficients_path, 'r') as f:
    loaded_coefficients = json.load(f)

print("Successfully loaded coefficients.json")
print(f"Model type: {loaded_coefficients['model_type']}")
print(f"Labels: {loaded_coefficients['labels']}")
print(f"Accuracy: {loaded_coefficients['accuracy']:.3f}")

# Manual prediction using loaded coefficients (simplified)
def predict_from_coefficients(sample, coefficients):
    """Simple manual prediction using loaded coefficients"""
    import math
    
    coef = np.array(coefficients['coefficients'])
    intercept = np.array(coefficients['intercept'])
    labels = coefficients['labels']
    
    # Calculate scores for each class
    scores = np.dot(coef, sample) + intercept
    
    # Softmax
    exp_scores = np.exp(scores - np.max(scores))
    probabilities = exp_scores / np.sum(exp_scores)
    
    predicted_idx = np.argmax(probabilities)
    return labels[predicted_idx], probabilities[predicted_idx]

# Test manual prediction
sample = test_samples[0]  # hourglass example
pred_label, pred_prob = predict_from_coefficients(sample, loaded_coefficients)
print(f"Manual prediction for {sample}: {pred_label} (confidence: {pred_prob:.3f})")

## Summary

We have successfully created and exported two models for body type classification:

1. **Logistic Regression Model** → `coefficients.json` (lightweight, easy to implement)
2. **TensorFlow Model** → `tfjs_model/` (more complex but potentially more accurate)

Both models can classify users into 5 body types based on waist/shoulder and hip/shoulder ratios extracted from pose detection.

### Next Steps
- Use these models in the React app
- Test with real pose detection data
- Fine-tune thresholds based on real-world performance

### Files Generated
- `../public/model/coefficients.json` - Logistic regression weights
- `../public/model/tfjs_model/` - TensorFlow.js model files
- `../public/model/tfjs_metadata.json` - Model metadata