# Win Rate Model Training

This notebook demonstrates the ML training process for the pricing intelligence POC.
We'll train a model to predict win probability based on pricing and customer features.

## Multi-Model Agent Architecture

This POC showcases how **multi-model agents** enhance pricing intelligence:
- **Rules Agent**: Business policy enforcement
- **WinRate Agent**: ML-based win probability prediction  
- **Elasticity Agent**: Economic price sensitivity modeling
- **Explainer Agent**: Natural language explanations
- **Orchestrator**: Coordinates all agents for optimal recommendations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
import joblib
from pathlib import Path

# Set style for better plots
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")

## 1. Load and Explore Data

In [None]:
# Load datasets
data_dir = Path('../data/sample_csv')

orders = pd.read_csv(data_dir / 'orders.csv')
customers = pd.read_csv(data_dir / 'customers.csv')
products = pd.read_csv(data_dir / 'products.csv')
cogs = pd.read_csv(data_dir / 'cogs.csv')

print("Dataset shapes:")
print(f"Orders: {orders.shape}")
print(f"Customers: {customers.shape}")
print(f"Products: {products.shape}")
print(f"COGS: {cogs.shape}")

# Show sample data
print("\nSample orders:")
orders.head()

## 2. Data Merging and Feature Engineering

In [None]:
# Merge datasets for feature engineering
df = orders.merge(customers, on='customer_id', how='left')
df = df.merge(products, on='product_id', how='left')
df = df.merge(cogs, on='product_id', how='left')

print(f"Merged dataset shape: {df.shape}")
print(f"Win rate: {df['won_flag'].mean():.1%}")

# Create pricing features
df['margin_pct'] = (df['net_price'] - df['cogs']) / df['cogs']
df['price_vs_competitor'] = df['net_price'] / df['competitor_price']
df['volume_tier'] = pd.cut(df['quantity'], bins=[0, 5, 15, 30, 100], labels=['Small', 'Medium', 'Large', 'XLarge'])
df['price_position'] = pd.cut(df['price_vs_competitor'], bins=[0, 0.9, 1.1, 2.0], labels=['Below', 'Match', 'Above'])

print("\nKey features created:")
print(f"- Margin %: {df['margin_pct'].mean():.1%} average")
print(f"- Price vs Competitor: {df['price_vs_competitor'].mean():.2f} average ratio")
print(f"- Volume distribution: {df['volume_tier'].value_counts().to_dict()}")
print(f"- Price positioning: {df['price_position'].value_counts().to_dict()}")

## 3. Model Training

In [None]:
# Prepare features for modeling
feature_cols = ['margin_pct', 'discount_depth', 'price_vs_competitor', 'quantity',
               'segment', 'channel', 'region', 'family', 'volume_tier', 'price_position']

# Encode categorical features
encoders = {}
categorical_cols = ['segment', 'channel', 'region', 'family', 'volume_tier', 'price_position']

for col in categorical_cols:
    if col in df.columns:
        le = LabelEncoder()
        df[col] = df[col].astype(str)
        df[col + '_encoded'] = le.fit_transform(df[col])
        encoders[col] = le

# Prepare feature matrix
feature_cols_encoded = [col + '_encoded' if col in categorical_cols else col for col in feature_cols]
X = df[feature_cols_encoded].fillna(0)
y = df['won_flag']

print(f"Feature matrix shape: {X.shape}")
print(f"Features: {feature_cols_encoded}")

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
print(f"\nTrain: {X_train.shape}, Test: {X_test.shape}")

In [None]:
# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate
y_pred = rf_model.predict(X_test)
y_pred_proba = rf_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, y_pred_proba)

print(f"Test AUC: {auc:.3f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Feature importance
feature_importance = pd.DataFrame({
    'feature': feature_cols_encoded,
    'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nTop 5 Feature Importances:")
for _, row in feature_importance.head(5).iterrows():
    print(f"{row['feature']}: {row['importance']:.3f}")

## 4. Save Model and Test Multi-Agent System

In [None]:
# Save model and encoders
models_dir = Path('../models')
models_dir.mkdir(exist_ok=True)

joblib.dump(rf_model, models_dir / 'winrate_model.pkl')
joblib.dump(encoders, models_dir / 'winrate_encoders.pkl')

print(f"✅ Model saved with AUC: {auc:.3f}")
print("Model training complete! Ready for production use.")

# Test multi-agent orchestrator
import sys
sys.path.append('..')

try:
    from agents.orchestrator import AgentOrchestrator
    
    # Initialize orchestrator
    orchestrator = AgentOrchestrator()
    
    # Test recommendation
    result = orchestrator.recommend_price(
        sku="SKU-001", 
        customer_id="C-100", 
        quantity=10, 
        country="DE", 
        channel="Direct", 
        currency="EUR"
    )
    
    print("\n🎯 Multi-Agent Recommendation Test:")
    print(f"Floor: ${result['floor']}")
    print(f"Target: ${result['target']}")
    print(f"Stretch: ${result['stretch']}")
    print(f"Win Probability: {result['p_win_at_target']:.1%}")
    print(f"Confidence: {result['confidence_score']}")
    
except Exception as e:
    print(f"Note: Multi-agent system will be available after running the API: {e}")

print("\n🚀 Next steps: Run 'make run' to start the API with full multi-agent intelligence!")