# Advanced Sleep Pattern Prediction Model Training
This notebook trains a sophisticated ML model using your sleep data and exports it for deployment.

In [None]:
# Install required packages
!pip install supabase tensorflow scikit-learn pandas numpy matplotlib seaborn joblib python-dotenv

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow import keras
import joblib
import json
from supabase import create_client
import os
from google.colab import files
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Configuration - Replace with your actual Supabase credentials
SUPABASE_URL = "YOUR_SUPABASE_URL_HERE"
SUPABASE_KEY = "YOUR_SUPABASE_ANON_KEY_HERE"

# Initialize Supabase client
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

print("✅ Configuration loaded")

In [None]:
# Load sleep data from Supabase
def load_sleep_data():
    try:
        response = supabase.table('sleep_records').select('*').order('start_time').execute()
        data = response.data
        
        if not data:
            raise ValueError("No sleep data found")
            
        df = pd.DataFrame(data)
        print(f"📊 Loaded {len(df)} sleep records")
        return df
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return None

# Load the data
sleep_df = load_sleep_data()
if sleep_df is not None:
    print("\n📋 Data Info:")
    print(sleep_df.info())
    print("\n🔍 First 5 records:")
    print(sleep_df.head())

In [None]:
# Advanced Feature Engineering
def create_advanced_features(df):
    df = df.copy()
    
    # Convert datetime columns
    df['start_time'] = pd.to_datetime(df['start_time'])
    df['end_time'] = pd.to_datetime(df['end_time'])
    
    # Calculate duration if not present
    if 'sleep_duration' not in df.columns or df['sleep_duration'].isna().any():
        df['sleep_duration'] = (df['end_time'] - df['start_time']).dt.total_seconds() / 3600
    
    # Extract time features
    df['start_hour'] = df['start_time'].dt.hour
    df['start_minute'] = df['start_time'].dt.minute
    df['day_of_week'] = df['start_time'].dt.dayofweek
    df['month'] = df['start_time'].dt.month
    df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
    
    # Cyclical encoding for time features
    df['start_hour_sin'] = np.sin(2 * np.pi * df['start_hour'] / 24)
    df['start_hour_cos'] = np.cos(2 * np.pi * df['start_hour'] / 24)
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    
    # Rolling statistics (7-day and 30-day windows)
    df = df.sort_values('start_time')
    df['duration_7d_avg'] = df['sleep_duration'].rolling(window=7, min_periods=1).mean()
    df['duration_7d_std'] = df['sleep_duration'].rolling(window=7, min_periods=1).std().fillna(0)
    df['start_hour_7d_avg'] = df['start_hour'].rolling(window=7, min_periods=1).mean()
    
    # Sleep debt calculation
    target_sleep = 8.0  # hours
    df['daily_sleep_debt'] = target_sleep - df['sleep_duration']
    df['cumulative_sleep_debt'] = df['daily_sleep_debt'].rolling(window=7, min_periods=1).sum()
    
    # Sleep quality features (if available)
    quality_cols = ['sleep_quality', 'stress_level', 'caffeine_intake', 'exercise_hours']
    for col in quality_cols:
        if col in df.columns:
            df[f'{col}_7d_avg'] = df[col].rolling(window=7, min_periods=1).mean()
    
    # Previous day features
    df['prev_duration'] = df['sleep_duration'].shift(1)
    df['prev_start_hour'] = df['start_hour'].shift(1)
    
    # Remove rows with NaN values for modeling
    df = df.dropna()
    
    return df

# Create features
featured_df = create_advanced_features(sleep_df)
print(f"🔧 Created {featured_df.shape[1]} features from {featured_df.shape[0]} records")
print("\n📊 Feature columns:")
print(list(featured_df.columns))

In [None]:
# Data Visualization
plt.figure(figsize=(15, 10))

# Sleep patterns over time
plt.subplot(2, 3, 1)
plt.plot(featured_df['start_time'], featured_df['start_hour'])
plt.title('Sleep Start Time Over Time')
plt.xticks(rotation=45)

plt.subplot(2, 3, 2)
plt.plot(featured_df['start_time'], featured_df['sleep_duration'])
plt.title('Sleep Duration Over Time')
plt.xticks(rotation=45)

# Day of week patterns
plt.subplot(2, 3, 3)
day_avg = featured_df.groupby('day_of_week')['sleep_duration'].mean()
plt.bar(range(7), day_avg.values)
plt.title('Average Sleep Duration by Day')
plt.xticks(range(7), ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])

plt.subplot(2, 3, 4)
start_avg = featured_df.groupby('day_of_week')['start_hour'].mean()
plt.bar(range(7), start_avg.values)
plt.title('Average Sleep Start Time by Day')
plt.xticks(range(7), ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])

# Sleep debt
plt.subplot(2, 3, 5)
plt.plot(featured_df['start_time'], featured_df['cumulative_sleep_debt'])
plt.title('Cumulative Sleep Debt')
plt.xticks(rotation=45)

# Correlation heatmap
plt.subplot(2, 3, 6)
numeric_cols = featured_df.select_dtypes(include=[np.number]).columns[:10]
corr = featured_df[numeric_cols].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0, fmt='.2f')
plt.title('Feature Correlation')

plt.tight_layout()
plt.show()

In [None]:
# Prepare data for modeling
def prepare_modeling_data(df):
    # Select features for modeling
    feature_cols = [
        'start_hour_sin', 'start_hour_cos', 'day_sin', 'day_cos', 'month_sin', 'month_cos',
        'is_weekend', 'duration_7d_avg', 'duration_7d_std', 'start_hour_7d_avg',
        'cumulative_sleep_debt', 'prev_duration', 'prev_start_hour'
    ]
    
    # Add quality features if available
    quality_features = [col for col in df.columns if col.endswith('_7d_avg') and 'duration' not in col and 'start_hour' not in col]
    feature_cols.extend(quality_features)
    
    # Filter existing columns
    feature_cols = [col for col in feature_cols if col in df.columns]
    
    X = df[feature_cols].values
    
    # Targets: start_hour and duration
    y_start = df['start_hour'].values
    y_duration = df['sleep_duration'].values
    
    return X, y_start, y_duration, feature_cols

X, y_start, y_duration, feature_names = prepare_modeling_data(featured_df)
print(f"📐 Features shape: {X.shape}")
print(f"🎯 Targets: start_time({len(y_start)}), duration({len(y_duration)})")
print(f"🔧 Using features: {feature_names}")

In [None]:
# Train multiple models and compare performance
def train_and_evaluate_models(X, y_start, y_duration):
    # Split data
    X_train, X_test, y_start_train, y_start_test, y_duration_train, y_duration_test = train_test_split(
        X, y_start, y_duration, test_size=0.2, random_state=42
    )
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    models = {
        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
        'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42),
        'Neural Network': MLPRegressor(hidden_layer_sizes=(100, 50), random_state=42, max_iter=1000)
    }
    
    results = {}
    trained_models = {}
    
    for name, model in models.items():
        print(f"\n🔄 Training {name}...")
        
        # Train for start time prediction
        if name == 'Neural Network':
            model_start = model
            model_start.fit(X_train_scaled, y_start_train)
            y_start_pred = model_start.predict(X_test_scaled)
            
            model_duration = MLPRegressor(hidden_layer_sizes=(100, 50), random_state=42, max_iter=1000)
            model_duration.fit(X_train_scaled, y_duration_train)
            y_duration_pred = model_duration.predict(X_test_scaled)
        else:
            model_start = model
            model_start.fit(X_train, y_start_train)
            y_start_pred = model_start.predict(X_test)
            
            # Clone model for duration
            if name == 'Random Forest':
                model_duration = RandomForestRegressor(n_estimators=100, random_state=42)
            else:
                model_duration = GradientBoostingRegressor(n_estimators=100, random_state=42)
            
            model_duration.fit(X_train, y_duration_train)
            y_duration_pred = model_duration.predict(X_test)
        
        # Calculate metrics
        start_mae = mean_absolute_error(y_start_test, y_start_pred)
        start_r2 = r2_score(y_start_test, y_start_pred)
        duration_mae = mean_absolute_error(y_duration_test, y_duration_pred)
        duration_r2 = r2_score(y_duration_test, y_duration_pred)
        
        results[name] = {
            'start_mae': start_mae,
            'start_r2': start_r2,
            'duration_mae': duration_mae,
            'duration_r2': duration_r2
        }
        
        trained_models[name] = {
            'start_model': model_start,
            'duration_model': model_duration,
            'scaler': scaler if name == 'Neural Network' else None
        }
        
        print(f"  Start Time - MAE: {start_mae:.2f}h, R²: {start_r2:.3f}")
        print(f"  Duration - MAE: {duration_mae:.2f}h, R²: {duration_r2:.3f}")
    
    return results, trained_models, scaler

# Train models
results, trained_models, scaler = train_and_evaluate_models(X, y_start, y_duration)

# Display results
print("\n📊 Model Comparison:")
results_df = pd.DataFrame(results).T
print(results_df.round(3))

In [None]:
# Advanced Deep Learning Model
def create_deep_model(input_dim):
    model = keras.Sequential([
        keras.layers.Dense(256, activation='relu', input_shape=(input_dim,)),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(2)  # Output: [start_time, duration]
    ])
    
    model.compile(
        optimizer='adam',
        loss='mse',
        metrics=['mae']
    )
    
    return model

# Prepare data for deep learning
X_train, X_test, y_start_train, y_start_test, y_duration_train, y_duration_test = train_test_split(
    X, y_start, y_duration, test_size=0.2, random_state=42
)

# Scale data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Combine targets
y_train_combined = np.column_stack([y_start_train, y_duration_train])
y_test_combined = np.column_stack([y_start_test, y_duration_test])

# Create and train deep model
print("🧠 Training Deep Learning Model...")
deep_model = create_deep_model(X_train_scaled.shape[1])

# Add callbacks
callbacks = [
    keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=10, factor=0.5)
]

# Train the model
history = deep_model.fit(
    X_train_scaled, y_train_combined,
    validation_split=0.2,
    epochs=200,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

# Evaluate deep model
y_pred_combined = deep_model.predict(X_test_scaled)
y_start_pred_deep = y_pred_combined[:, 0]
y_duration_pred_deep = y_pred_combined[:, 1]

deep_start_mae = mean_absolute_error(y_start_test, y_start_pred_deep)
deep_start_r2 = r2_score(y_start_test, y_start_pred_deep)
deep_duration_mae = mean_absolute_error(y_duration_test, y_duration_pred_deep)
deep_duration_r2 = r2_score(y_duration_test, y_duration_pred_deep)

print(f"\n🧠 Deep Learning Results:")
print(f"  Start Time - MAE: {deep_start_mae:.2f}h, R²: {deep_start_r2:.3f}")
print(f"  Duration - MAE: {deep_duration_mae:.2f}h, R²: {deep_duration_r2:.3f}")

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Model MAE')
plt.legend()
plt.show()

In [None]:
# Select best model and prepare for export
all_results = results.copy()
all_results['Deep Learning'] = {
    'start_mae': deep_start_mae,
    'start_r2': deep_start_r2,
    'duration_mae': deep_duration_mae,
    'duration_r2': deep_duration_r2
}

# Find best model based on combined performance
model_scores = {}
for name, metrics in all_results.items():
    # Combined score: average R² (higher is better)
    combined_r2 = (metrics['start_r2'] + metrics['duration_r2']) / 2
    model_scores[name] = combined_r2

best_model_name = max(model_scores, key=model_scores.get)
print(f"🏆 Best Model: {best_model_name} (R² Score: {model_scores[best_model_name]:.3f})")

# Final results table
final_results_df = pd.DataFrame(all_results).T
final_results_df['combined_r2'] = final_results_df[['start_r2', 'duration_r2']].mean(axis=1)
final_results_df = final_results_df.sort_values('combined_r2', ascending=False)

print("\n📊 Final Model Rankings:")
print(final_results_df.round(3))

In [None]:
# Export the best model for deployment
def export_model_for_deployment():
    export_data = {
        'model_type': best_model_name,
        'feature_names': feature_names,
        'performance': all_results[best_model_name],
        'training_date': datetime.now().isoformat(),
        'data_points': len(featured_df)
    }
    
    if best_model_name == 'Deep Learning':
        # Save TensorFlow model
        deep_model.save('sleep_prediction_model.h5')
        
        # Save scaler
        joblib.dump(scaler, 'feature_scaler.pkl')
        
        export_data['model_files'] = ['sleep_prediction_model.h5', 'feature_scaler.pkl']
        export_data['requires_tensorflow'] = True
        
    else:
        # Save scikit-learn models
        model_data = trained_models[best_model_name]
        joblib.dump(model_data['start_model'], 'start_time_model.pkl')
        joblib.dump(model_data['duration_model'], 'duration_model.pkl')
        
        if model_data['scaler']:
            joblib.dump(model_data['scaler'], 'feature_scaler.pkl')
            export_data['model_files'] = ['start_time_model.pkl', 'duration_model.pkl', 'feature_scaler.pkl']
        else:
            export_data['model_files'] = ['start_time_model.pkl', 'duration_model.pkl']
        
        export_data['requires_tensorflow'] = False
    
    # Save model metadata
    with open('model_config.json', 'w') as f:
        json.dump(export_data, f, indent=2)
    
    print(f"✅ Model exported successfully!")
    print(f"📁 Files: {export_data['model_files']} + model_config.json")
    
    return export_data

# Export the model
export_info = export_model_for_deployment()

# Show export summary
print("\n📋 Export Summary:")
print(f"Model Type: {export_info['model_type']}")
print(f"Performance - Start Time R²: {export_info['performance']['start_r2']:.3f}")
print(f"Performance - Duration R²: {export_info['performance']['duration_r2']:.3f}")
print(f"Training Data Points: {export_info['data_points']}")
print(f"Features Used: {len(export_info['feature_names'])}")

In [None]:
# Download all model files
print("📥 Downloading model files...")

# Download model config
files.download('model_config.json')

# Download model files
for file_name in export_info['model_files']:
    if os.path.exists(file_name):
        files.download(file_name)
        print(f"✅ Downloaded: {file_name}")
    else:
        print(f"❌ File not found: {file_name}")

print("\n🎉 All files downloaded! Upload these to your Vercel project.")

## 🚀 Next Steps for Vercel Deployment

1. **Upload the downloaded files** to your Vercel project in a `models/` directory
2. **Install required packages** in your `package.json`:
   - For TensorFlow: `@tensorflow/tfjs-node`
   - For scikit-learn models: `ml-matrix` or similar JS ML library
3. **Update your prediction API** to use the trained model
4. **Test the deployed model** with real predictions

The trained model should provide much better accuracy than the simple linear regression!