In [249]:
import pandas as pd
import joblib
import gradio as gr
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

# Load your dataset
df = pd.read_csv("ipl_dataset_final_enhanced_v2.csv")

# Data Validation and Preparation
required_columns = [
    'team1', 'team2', 'venue', 'promo_hit',
    'team1_batsmen_avg_six_pct', 'team2_batsmen_avg_six_pct',
    'venue_six_rate', 'venue_promo_rate'
]

# Validate dataset structure
if not all(col in df.columns for col in required_columns):
    raise ValueError("Dataset structure mismatch. Check column names.")

# Feature Engineering
categorical_features = ['team1', 'team2', 'venue']
numerical_features = [
    'team1_batsmen_avg_six_pct', 'team2_batsmen_avg_six_pct',
    'venue_six_rate', 'venue_promo_rate'
]

# Preprocessing Pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', StandardScaler(), numerical_features)
    ]
)

# Model Pipeline
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(
        n_estimators=200,
        class_weight='balanced',
        random_state=42
    ))
])

# Train Model
X = df[categorical_features + numerical_features]
y = df['promo_hit']
model.fit(X, y)

# Get valid options from dataset
valid_teams = sorted(set(df['team1'].unique()).union(df['team2'].unique()))
valid_venues = sorted(df['venue'].unique())

# Prediction Function with Validation
def predict_promo(team1, team2, venue):
    """Predict promo hit probability with input validation"""
    try:
        # Validate inputs
        if team1 not in valid_teams:
            return f"Invalid team1: {team1}. Valid options: {valid_teams}"
        if team2 not in valid_teams:
            return f"Invalid team2: {team2}. Valid options: {valid_teams}"
        if venue not in valid_venues:
            return f"Invalid venue: {venue}. Valid options: {valid_venues}"

        # Create input DataFrame
        input_data = pd.DataFrame([{
            'team1': team1,
            'team2': team2,
            'venue': venue,
            'team1_batsmen_avg_six_pct': df[df['team1'] == team1]['team1_batsmen_avg_six_pct'].mean(),
            'team2_batsmen_avg_six_pct': df[df['team2'] == team2]['team2_batsmen_avg_six_pct'].mean(),
            'venue_six_rate': df[df['venue'] == venue]['venue_six_rate'].mean(),
            'venue_promo_rate': df[df['venue'] == venue]['venue_promo_rate'].mean()
        }])

        # Handle missing values
        input_data = input_data.fillna(df.mean(numeric_only=True))

        # Predict probability
        prob = model.predict_proba(input_data)[0][1]
        return f"Promo Hit Probability: {prob*100:.1f}%"
    
    except Exception as e:
        return f"Prediction failed: {str(e)}"

# Gradio UI with Matchup Analysis
def analyze_matchups(top_n=5):
    """Analyze best/worst promo hit probabilities"""
    results = []
    
    for venue in valid_venues[:3]:  # Top 3 venues
        for team1 in valid_teams:
            for team2 in valid_teams:
                if team1 != team2:
                    try:
                        prob = model.predict_proba(pd.DataFrame([{
                            'team1': team1,
                            'team2': team2,
                            'venue': venue,
                            'team1_batsmen_avg_six_pct': df[df['team1'] == team1]['team1_batsmen_avg_six_pct'].mean(),
                            'team2_batsmen_avg_six_pct': df[df['team2'] == team2]['team2_batsmen_avg_six_pct'].mean(),
                            'venue_six_rate': df[df['venue'] == venue]['venue_six_rate'].mean(),
                            'venue_promo_rate': df[df['venue'] == venue]['venue_promo_rate'].mean()
                        }]))[0][1]
                        results.append((f"{team1} vs {team2} at {venue}", prob))
                    except:
                        continue
    
    # Sort and format results
    results.sort(key=lambda x: x[1], reverse=True)
    
    best = "\n".join([f"{i+1}. {m[0]}: {m[1]*100:.1f}%" for i, m in enumerate(results[:top_n])])
    worst = "\n".join([f"{i+1}. {m[0]}: {m[1]*100:.1f}%" for i, m in enumerate(reversed(results[-top_n:]))])
    
    return f"🏆 Top {top_n} Matchups:\n{best}\n\n⚡️ Bottom {top_n} Matchups:\n{worst}"

# Create Gradio Interface
with gr.Blocks(title="IPL Promo Predictor Pro", theme=gr.themes.Soft(), css=".gradio-container {max-width: 800px !important}") as app:
    gr.Markdown("""
    # 🏏 IPL Promo Hit Predictor 
    *Predict the probability of a promo hit (six in first 2 overs + team loses)*
    """)
    
    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("### 🏟️ Match Details")
            with gr.Row():
                team1 = gr.Dropdown(choices=valid_teams, label="Home Team", info="Select first team")
                team2 = gr.Dropdown(choices=valid_teams, label="Away Team", info="Select second team")
            venue = gr.Dropdown(choices=valid_venues, label="Stadium", info="Select match venue")
            predict_btn = gr.Button("Calculate Probability", variant="primary")
        
    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            gr.Markdown("### 📊 Prediction Result")
            output = gr.Textbox(label="Result", elem_classes="result-box")
            
    with gr.Row():
        with gr.Column():
            gr.Markdown("---")
            with gr.Row():
                analysis_btn = gr.Button("🏆 Analyze Top/Bottom Matchups", variant="secondary")
            analysis_output = gr.Textbox(label="Analysis Results", lines=8, elem_classes="analysis-box")

    # Dynamic venue update
    def update_venues(team1, team2):
        valid_venues = sorted(set(df[df['team1'] == team1]['venue']) | set(df[df['team2'] == team2]['venue']))
        return gr.Dropdown(choices=valid_venues, value=valid_venues[0] if valid_venues else None)
    
    team1.change(update_venues, [team1, team2], venue)
    team2.change(update_venues, [team1, team2], venue)

    predict_btn.click(
        predict_promo,
        inputs=[team1, team2, venue],
        outputs=output
    )
    
    analysis_btn.click(
        lambda: analyze_matchups(5),
        outputs=analysis_output
    )

# Add custom CSS
app.css = """
.result-box {
    padding: 20px;
    border-radius: 10px;
    border: 2px solid #4CAF50;
    font-size: 1.2em;
}

.analysis-box {
    padding: 15px;
    border-radius: 8px;
    border: 1px solid #ddd;
}

h1 {
    text-align: center;
    color: #2c3e50;
}

.markdown {
    margin-bottom: 15px !important;
}

button {
    transition: all 0.3s ease !important;
}

button:hover {
    transform: scale(1.02);
}
"""

# Save model and run app
if __name__ == "__main__":
    app.launch()

* Running on local URL:  http://127.0.0.1:7871

To create a public link, set `share=True` in `launch()`.
