In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import os
import hashlib
import base64
import json

def domino_short_id(length=8):
    user = os.environ.get("DOMINO_USER_NAME", "demo_user")
    project = os.environ.get("DOMINO_PROJECT_ID", "demo_project")
    digest = hashlib.sha256(f"{user}/{project}".encode()).digest()
    encoded = base64.urlsafe_b64encode(digest).decode("utf-8").rstrip("=")
    return f"{user}_{encoded[:length]}"

def generate_data(n=200):
    """Generate fake data for model training"""
    np.random.seed(42)
    X = pd.DataFrame({
        'feature_1': np.random.randn(n),
        'feature_2': np.random.randn(n),
        'feature_3': np.random.randn(n),
        'feature_4': np.random.randn(n)
    })
    y = X.sum(axis=1) + np.random.randn(n) * 0.1
    return X, y

# Define simplified models
MODELS = [
    {"name": "HelpBot", "long_name": "Internal Helpdesk Chatbot"},
    {"name": "PressFinder", "long_name": "Customer Press Release Discovery & Summary Tool"},
    {"name": "VoiceOverPro", "long_name": "Internal Video Voice-Over Tool"},
    {"name": "FINREP Extractor", "long_name": "Internal Tool: Data Extractor from Financial Reports"},
    {"name": "Fitch Portfolio Optimizer", "long_name": "Portfolio Optimization Tool for Subscribers"},
    {"name": "EntityTagger Pro", "long_name": "Third-Party Press Release Tagging Tool"},
    {"name": "ResumeRanker Pro", "long_name": "Recruitment Selection Support Tool"}
]

def train_model(model_info):
    """Train and log a model"""
    # Use local file-based tracking
    mlflow.set_tracking_uri("file:///tmp/mlflow")
    
    try:
        experiment_name = f"Model_Dashboard_{domino_short_id()}"
        experiment = mlflow.get_experiment_by_name(experiment_name)
        if experiment is None:
            mlflow.create_experiment(experiment_name)
        mlflow.set_experiment(experiment_name)
    except Exception:
        # Fallback to default experiment
        mlflow.set_experiment("Default")
    
    with mlflow.start_run(run_name=f"{model_info['name']}_training") as run:
        X, y = generate_data()
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Log parameters
        mlflow.log_params({
            "model_name": model_info["name"],
            "data_points": len(X_train),
            "features": list(X.columns)
        })
        
        # Log metrics
        rmse = np.sqrt(((y_test - y_pred) ** 2).mean())
        r2 = 1 - ((y_test - y_pred) ** 2).sum() / ((y_test - y_test.mean()) ** 2).sum()
        
        mlflow.log_metrics({
            "rmse": rmse,
            "r2_score": r2
        })
        
        # Log tags
        mlflow.set_tags({
            "long-name": model_info["long_name"],
            "timestamp": datetime.now().isoformat()
        })
        
        # Log model
        mlflow.sklearn.log_model(model, "model", registered_model_name=model_info["name"])
        
        print(f"✅ {model_info['name']} trained successfully")
        return run.info.run_id

if __name__ == "__main__":
    print("🚀 Starting Model Training...")
    
    run_ids = []
    for model_info in MODELS:
        try:
            run_id = train_model(model_info)
            run_ids.append({"name": model_info["name"], "run_id": run_id})
        except Exception as e:
            print(f"❌ Error training {model_info['name']}: {str(e)}")
    
    # Save summary
    summary_path = f"/tmp/model_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(summary_path, 'w') as f:
        json.dump(run_ids, f, indent=2)
    
    print(f"\n✨ Training Complete!")
    print(f"📈 MLflow UI: http://localhost:5000")
    print(f"📄 Summary: {summary_path}")
    print(f"🔍 Total models: {len(run_ids)}")

🚀 Starting Model Training...


Successfully registered model 'HelpBot'.
Created version '1' of model 'HelpBot'.


✅ HelpBot trained successfully


Successfully registered model 'PressFinder'.
Created version '1' of model 'PressFinder'.


✅ PressFinder trained successfully


Successfully registered model 'VoiceOverPro'.
Created version '1' of model 'VoiceOverPro'.


✅ VoiceOverPro trained successfully


Successfully registered model 'FINREP Extractor'.
Created version '1' of model 'FINREP Extractor'.


✅ FINREP Extractor trained successfully


Successfully registered model 'Fitch Portfolio Optimizer'.
Created version '1' of model 'Fitch Portfolio Optimizer'.


✅ Fitch Portfolio Optimizer trained successfully


Successfully registered model 'EntityTagger Pro'.
Created version '1' of model 'EntityTagger Pro'.


✅ EntityTagger Pro trained successfully




✅ ResumeRanker Pro trained successfully

✨ Training Complete!
📈 MLflow UI: http://localhost:5000
📄 Summary: /tmp/model_summary_20250911_201358.json
🔍 Total models: 7


Successfully registered model 'ResumeRanker Pro'.
Created version '1' of model 'ResumeRanker Pro'.
