# Stage 13 Homework Starter — Productization

## Objective
Deploy your trained model as a **reusable, handoff-ready API or dashboard** and finalize your project for reproducibility and clarity.

## 1. Create mock, very basic analysis

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import pickle
import os
import sys

# Add src to path to import utils
sys.path.append('../src')
from utils import validate_inputs, calculate_confidence_interval, preprocess_features

# Load the options data from previous stages
data_path = '../data/options_data_20250823_163952.csv'
df = pd.read_csv(data_path)

# Use utils function to preprocess features
df['moneyness'] = df['underlying_price'] / df['strike']
df['vol_time'] = df['implied_volatility'] * df['time_to_expiry']

# Handle missing values (use drop missing strategy - best from Stage 11)
features = ['implied_volatility', 'moneyness', 'vol_time']
target = 'market_price'
clean_df = df[features + [target]].dropna()

X = clean_df[features]
y = clean_df[target]

print(f"Final dataset: {X.shape[0]} samples, {X.shape[1]} features")
print(f"Target range: ${y.min():.2f} - ${y.max():.2f}")

Final dataset: 496 samples, 3 features
Target range: $0.01 - $495.83


## Model Training


In [3]:
# Train final model using best scenario from Stage 11
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model Performance:")
print(f"MAE: ${mae:.2f}")
print(f"R²: {r2:.3f}")
print(f"Features: {features}")

# Test utils validation function
test_vol, test_moneyness, test_vol_time = 0.25, 1.05, 0.05
validation_errors = validate_inputs(test_vol, test_moneyness, test_vol_time)
if validation_errors:
    print(f"Validation errors: {validation_errors}")
else:
    print("✓ Test inputs passed validation")

Model Performance:
MAE: $66.84
R²: 0.344
Features: ['implied_volatility', 'moneyness', 'vol_time']
✓ Test inputs passed validation


## Pickle / Save Final Model

In [4]:
# Ensure model directory exists
os.makedirs('../model', exist_ok=True)

# Save model and metadata
model_data = {
    'model': model,
    'features': features,
    'mae': mae,
    'r2': r2,
    'n_samples': len(X)
}

with open('../model/options_pricing_model.pkl', 'wb') as f:
    pickle.dump(model_data, f)

# Test loading and prediction using utils
with open('../model/options_pricing_model.pkl', 'rb') as f:
    loaded_data = pickle.load(f)
    loaded_model = loaded_data['model']

# Test prediction with confidence interval using utils
test_features = [0.25, 1.05, 0.05]  # implied_vol, moneyness, vol_time
test_pred = loaded_model.predict([test_features])[0]

# Use utils function for confidence interval
confidence_result = calculate_confidence_interval(test_pred, mae)

print(f"Test prediction: ${test_pred:.2f}")
print(f"95% CI: ${confidence_result['lower_bound']:.2f} - ${confidence_result['upper_bound']:.2f}")
print(f"Model loaded successfully with {len(loaded_data['features'])} features")

Test prediction: $33.39
95% CI: $-97.61 - $164.40
Model loaded successfully with 3 features




## Flask API Starter

In [7]:
import requests
import time

# Test different input scenarios using utils functions
test_cases = [
    (0.25, 1.05, 0.05),  # Valid case
    (0.25, 1.05, 0.6),   # Invalid vol_time (too high)
    (3.0, 1.05, 0.05),   # Invalid implied_vol (too high)
    (0.25, 0.3, 0.05),   # Invalid moneyness (too low)
]

print("Testing input validation with utils:")
for vol, money, vol_time in test_cases:
    errors = validate_inputs(vol, money, vol_time)
    status = "✓ VALID" if not errors else f"✗ INVALID: {errors[0]}"
    print(f"Vol={vol}, Money={money}, VolTime={vol_time} → {status}")

print("\nFlask API Testing:")
print("Start the API with: python app.py")
print("Then uncomment the code below to test endpoints:")

# Uncomment when API is running:
try:
    # Test valid case
    response = requests.get('http://127.0.0.1:5000/predict/0.25/1.05/0.05')
    print(f"✓ API prediction: {response.json()}")
    
    # Test invalid case
    response = requests.get('http://127.0.0.1:5000/predict/3.0/1.05/0.05')
    print(f"✗ API validation: {response.json()}")
    
    # Test POST with utils validation
    test_data = {'implied_volatility': 0.25, 'moneyness': 1.05, 'vol_time': 0.05}
    response = requests.post('http://127.0.0.1:5000/predict', json=test_data)
    print(f"✓ POST prediction: {response.json()}")
except Exception as e:
    print(f"API not running: {e}")

Testing input validation with utils:
Vol=0.25, Money=1.05, VolTime=0.05 → ✓ VALID
Vol=0.25, Money=1.05, VolTime=0.6 → ✗ INVALID: Vol-time interaction must be between 0.0 and 0.5
Vol=3.0, Money=1.05, VolTime=0.05 → ✗ INVALID: Implied volatility must be between 0.1 and 2.0
Vol=0.25, Money=0.3, VolTime=0.05 → ✗ INVALID: Moneyness must be between 0.5 and 2.0

Flask API Testing:
Start the API with: python app.py
Then uncomment the code below to test endpoints:
✓ API prediction: {'confidence': 0.95, 'inputs': {'implied_volatility': 0.25, 'moneyness': 1.05, 'vol_time': 0.05}, 'lower_bound': -97.61306294426569, 'prediction': 33.39144756133674, 'upper_bound': 164.3959580669392}
✗ API validation: {'details': ['Implied volatility must be between 0.1 and 2.0'], 'error': 'Validation failed'}
✓ POST prediction: {'confidence': 0.95, 'inputs': {'implied_volatility': 0.25, 'moneyness': 1.05, 'vol_time': 0.05}, 'lower_bound': -97.61306294426569, 'prediction': 33.39144756133674, 'upper_bound': 164.395958