In [5]:
# Install required libraries
!pip install gradio plotly seaborn xgboost lightgbm scikit-learn -q

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

print("‚úÖ All libraries installed successfully!")


‚úÖ All libraries installed successfully!


In [6]:
# Upload file
github_url = "https://raw.githubusercontent.com/ak-rahul/datasets/main/mandi-daily-price.csv"

df = pd.read_csv(github_url)

print(f"‚úÖ Dataset loaded from GitHub: {df.shape[0]} rows, {df.shape[1]} columns")

df.columns = df.columns.str.replace('_x0020_', '_')

# Feature engineering
df['Price_Spread'] = df['Max_Price'] - df['Min_Price']
df['Price_Volatility_Pct'] = (df['Price_Spread'] / df['Modal_Price']) * 100

# Add categories
vegetables = ['Potato', 'Tomato', 'Onion', 'Cauliflower', 'Cabbage', 'Brinjal',
              'Green Chilli', 'Carrot', 'Raddish', 'Pumpkin', 'Capsicum', 'Beans']
fruits = ['Mango', 'Apple', 'Banana', 'Orange', 'Grapes', 'Kiwi Fruit', 'Pineapple', 'Kinnow']
spices = ['Cummin Seed(Jeera)', 'Dry Chillies', 'Chili Red', 'Turmeric', 'Coriander']

df['Category'] = df['Commodity'].apply(
    lambda x: 'Vegetables' if x in vegetables else
              'Fruits' if x in fruits else
              'Spices' if x in spices else 'Others'
)

print(f"‚úÖ Dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns")


‚úÖ Dataset loaded from GitHub: 2000 rows, 10 columns
‚úÖ Dataset loaded: 2000 rows, 13 columns


In [7]:
# Prepare ML dataset
df_ml = df.copy()

# Encode categorical variables
encoders = {}
for col in ['State', 'District', 'Market', 'Commodity', 'Variety', 'Grade', 'Category']:
    le = LabelEncoder()
    df_ml[f'{col}_Encoded'] = le.fit_transform(df_ml[col])
    encoders[col] = le

# Features and target
feature_cols = ['State_Encoded', 'District_Encoded', 'Market_Encoded',
                'Commodity_Encoded', 'Variety_Encoded', 'Grade_Encoded',
                'Category_Encoded', 'Min_Price', 'Max_Price']

X = df_ml[feature_cols]
y = df_ml['Modal_Price']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training models...")

# Train models
models = {}

# Random Forest
rf_model = RandomForestRegressor(n_estimators=100, max_depth=15, random_state=42, n_jobs=-1)
rf_model.fit(X_train, y_train)
models['Random Forest'] = rf_model

# XGBoost
xgb_model = XGBRegressor(n_estimators=100, max_depth=7, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)
models['XGBoost'] = xgb_model

# LightGBM
lgbm_model = LGBMRegressor(n_estimators=100, max_depth=7, learning_rate=0.1, random_state=42, verbose=-1)
lgbm_model.fit(X_train, y_train)
models['LightGBM'] = lgbm_model

# Evaluate models
results = []
for name, model in models.items():
    pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, pred))
    mae = mean_absolute_error(y_test, pred)
    r2 = r2_score(y_test, pred)
    results.append({'Model': name, 'RMSE': rmse, 'MAE': mae, 'R2': r2})

results_df = pd.DataFrame(results).sort_values('R2', ascending=False)

print("‚úÖ Models trained successfully!")
print(results_df.to_string(index=False))

# Best model
best_model_name = results_df.iloc[0]['Model']
best_model = models[best_model_name]


Training models...
‚úÖ Models trained successfully!
        Model        RMSE        MAE       R2
Random Forest  728.040616 148.739490 0.989154
      XGBoost 2756.333488 320.560292 0.844542
     LightGBM 3308.553237 500.445315 0.776012


In [9]:
# ==================== TAB 1: DATA OVERVIEW ====================
def show_data_overview():
    overview = f"""
    # üìä Mandi Price Dataset Overview

    **Dataset Shape:** {df.shape[0]} rows √ó {df.shape[1]} columns

    **Date:** {df['Arrival_Date'].iloc[0]}

    **Key Statistics:**
    - **Total Commodities:** {df['Commodity'].nunique()}
    - **Total Markets:** {df['Market'].nunique()}
    - **States Covered:** {df['State'].nunique()}
    - **Districts:** {df['District'].nunique()}

    **Price Range:**
    - **Minimum Price:** ‚Çπ{df['Modal_Price'].min():.2f}
    - **Maximum Price:** ‚Çπ{df['Modal_Price'].max():.2f}
    - **Average Price:** ‚Çπ{df['Modal_Price'].mean():.2f}
    - **Median Price:** ‚Çπ{df['Modal_Price'].median():.2f}
    """

    # Sample data
    sample_df = df[['State', 'Market', 'Commodity', 'Variety', 'Min_Price', 'Max_Price', 'Modal_Price']].head(10)

    return overview, sample_df

# ==================== TAB 2: VISUALIZATIONS ====================
def create_visualization(viz_type):
    if viz_type == "Top 20 Commodities by Price":
        top_commodities = df.groupby('Commodity')['Modal_Price'].mean().sort_values(ascending=False).head(20)
        fig = px.bar(x=top_commodities.values, y=top_commodities.index,
                     orientation='h',
                     title='Top 20 Commodities by Average Price',
                     labels={'x': 'Average Price (‚Çπ)', 'y': 'Commodity'},
                     color=top_commodities.values,
                     color_continuous_scale='Viridis')
        fig.update_layout(height=600, showlegend=False)
        return fig

    elif viz_type == "State-wise Price Distribution":
        state_prices = df.groupby('State')['Modal_Price'].mean().sort_values(ascending=False).head(15)
        fig = px.bar(x=state_prices.index, y=state_prices.values,
                     title='Top 15 States by Average Price',
                     labels={'x': 'State', 'y': 'Average Price (‚Çπ)'},
                     color=state_prices.values,
                     color_continuous_scale='RdYlGn_r')
        fig.update_layout(xaxis_tickangle=-45, height=500)
        return fig

    elif viz_type == "Price Distribution by Category":
        fig = px.box(df, x='Category', y='Modal_Price',
                     title='Price Distribution by Commodity Category',
                     labels={'Modal_Price': 'Modal Price (‚Çπ)'},
                     color='Category')
        fig.update_layout(height=500)
        return fig

    elif viz_type == "Price Volatility Analysis":
        volatility = df.groupby('Commodity').agg({
            'Price_Volatility_Pct': 'mean',
            'Modal_Price': 'count'
        }).rename(columns={'Modal_Price': 'Count'})
        volatility = volatility[volatility['Count'] >= 5].sort_values('Price_Volatility_Pct', ascending=False).head(15)

        fig = go.Figure(data=[
            go.Bar(x=volatility.index, y=volatility['Price_Volatility_Pct'],
                   marker_color='indianred')
        ])
        fig.update_layout(title='Top 15 Most Volatile Commodities',
                          xaxis_title='Commodity',
                          yaxis_title='Average Volatility (%)',
                          xaxis_tickangle=-45,
                          height=500)
        return fig

    elif viz_type == "Price Spread vs Modal Price":
        fig = px.scatter(df, x='Modal_Price', y='Price_Spread',
                         color='Category',
                         hover_data=['Commodity', 'State'],
                         title='Price Spread vs Modal Price',
                         labels={'Modal_Price': 'Modal Price (‚Çπ)',
                                 'Price_Spread': 'Price Spread (‚Çπ)'},
                         opacity=0.6)
        fig.update_layout(height=500)
        return fig

    elif viz_type == "Top 10 Markets by Items":
        market_counts = df['Market'].value_counts().head(10)
        fig = px.bar(x=market_counts.index, y=market_counts.values,
                     title='Top 10 Markets by Number of Items',
                     labels={'x': 'Market', 'y': 'Number of Items'},
                     color=market_counts.values,
                     color_continuous_scale='Blues')
        fig.update_layout(xaxis_tickangle=-45, height=500)
        return fig

# ==================== TAB 3: PRICE PREDICTION ====================
def predict_price(state, district, market, commodity, variety, grade, min_price, max_price):
    try:
        # Create input dataframe
        input_data = pd.DataFrame({
            'State': [state],
            'District': [district],
            'Market': [market],
            'Commodity': [commodity],
            'Variety': [variety],
            'Grade': [grade],
            'Category': ['Vegetables' if commodity in vegetables else
                        'Fruits' if commodity in fruits else
                        'Spices' if commodity in spices else 'Others']
        })

        # Encode features
        for col in ['State', 'District', 'Market', 'Commodity', 'Variety', 'Grade', 'Category']:
            try:
                input_data[f'{col}_Encoded'] = encoders[col].transform(input_data[col])
            except:
                return f"‚ùå Error: '{input_data[col].iloc[0]}' not found in training data for {col}. Please select valid options.", None

        input_data['Min_Price'] = min_price
        input_data['Max_Price'] = max_price

        # Prepare features
        X_input = input_data[feature_cols]

        # Make predictions with all models
        predictions = {}
        for name, model in models.items():
            pred = model.predict(X_input)[0]
            predictions[name] = pred

        # Create result string
        result = f"""
        ## üéØ Price Prediction Results

        **Input Details:**
        - **Location:** {market}, {district}, {state}
        - **Commodity:** {commodity} ({variety})
        - **Grade:** {grade}
        - **Min Price:** ‚Çπ{min_price:.2f}
        - **Max Price:** ‚Çπ{max_price:.2f}

        **Predicted Modal Prices:**
        """

        for name, pred in predictions.items():
            result += f"\n- **{name}:** ‚Çπ{pred:.2f}"

        result += f"\n\n**üìä Best Model ({best_model_name}):** ‚Çπ{predictions[best_model_name]:.2f}"
        result += f"\n\n**Expected Price Range:** ‚Çπ{min_price:.2f} - ‚Çπ{max_price:.2f}"

        # Create visualization
        pred_df = pd.DataFrame({
            'Model': list(predictions.keys()),
            'Predicted Price': list(predictions.values())
        })

        fig = px.bar(pred_df, x='Model', y='Predicted Price',
                     title='Price Predictions by Model',
                     labels={'Predicted Price': 'Price (‚Çπ)'},
                     color='Predicted Price',
                     color_continuous_scale='Viridis')
        fig.add_hline(y=min_price, line_dash="dash", line_color="red",
                     annotation_text="Min Price")
        fig.add_hline(y=max_price, line_dash="dash", line_color="green",
                     annotation_text="Max Price")
        fig.update_layout(height=400)

        return result, fig

    except Exception as e:
        return f"‚ùå Error: {str(e)}", None

# ==================== TAB 4: MODEL COMPARISON ====================
def show_model_comparison():
    # Model performance table
    comparison_text = f"""
    # ü§ñ Model Performance Comparison

    **Training Data:** {X_train.shape[0]} samples
    **Test Data:** {X_test.shape[0]} samples
    **Features Used:** {len(feature_cols)}

    """

    # Create comparison chart
    fig = make_subplots(rows=1, cols=3,
                        subplot_titles=('RMSE', 'MAE', 'R¬≤ Score'))

    fig.add_trace(go.Bar(x=results_df['Model'], y=results_df['RMSE'],
                         name='RMSE', marker_color='indianred'), row=1, col=1)
    fig.add_trace(go.Bar(x=results_df['Model'], y=results_df['MAE'],
                         name='MAE', marker_color='lightblue'), row=1, col=2)
    fig.add_trace(go.Bar(x=results_df['Model'], y=results_df['R2'],
                         name='R¬≤', marker_color='lightgreen'), row=1, col=3)

    fig.update_layout(height=400, showlegend=False, title_text="Model Metrics Comparison")

    # Feature importance
    feature_importance = pd.DataFrame({
        'Feature': feature_cols,
        'Importance': best_model.feature_importances_
    }).sort_values('Importance', ascending=False)

    fig2 = px.bar(feature_importance, x='Importance', y='Feature',
                  orientation='h',
                  title=f'Feature Importance - {best_model_name}',
                  color='Importance',
                  color_continuous_scale='Viridis')
    fig2.update_layout(height=400)

    return comparison_text, results_df, fig, fig2

# ==================== BUILD GRADIO INTERFACE ====================
with gr.Blocks(theme=gr.themes.Soft(), title="üåæ Mandi Price Analysis Dashboard") as dashboard:

    gr.Markdown("""
    # üåæ Agricultural Commodity Price Analysis & Prediction Dashboard
    ## Interactive ML-Powered Mandi Price Analytics

    Analyze commodity prices across Indian markets and predict modal prices using advanced ML models.
    """)

    with gr.Tabs():
        # TAB 1: Data Overview
        with gr.Tab("üìä Data Overview"):
            gr.Markdown("### Explore the dataset statistics and sample data")
            overview_btn = gr.Button("üîÑ Load Data Overview", variant="primary")
            overview_text = gr.Markdown()
            overview_table = gr.Dataframe()

            overview_btn.click(
                fn=show_data_overview,
                outputs=[overview_text, overview_table]
            )

        # TAB 2: Visualizations
        with gr.Tab("üìà Visualizations"):
            gr.Markdown("### Interactive Data Visualizations")
            viz_dropdown = gr.Dropdown(
                choices=[
                    "Top 20 Commodities by Price",
                    "State-wise Price Distribution",
                    "Price Distribution by Category",
                    "Price Volatility Analysis",
                    "Price Spread vs Modal Price",
                    "Top 10 Markets by Items"
                ],
                value="Top 20 Commodities by Price",
                label="Select Visualization"
            )
            viz_btn = gr.Button("üìä Generate Visualization", variant="primary")
            viz_plot = gr.Plot()

            viz_btn.click(
                fn=create_visualization,
                inputs=viz_dropdown,
                outputs=viz_plot
            )

        # TAB 3: Price Prediction
        with gr.Tab("üéØ Price Prediction"):
            gr.Markdown("### Predict Modal Price for Any Commodity")

            with gr.Row():
                with gr.Column():
                    state_input = gr.Dropdown(
                        choices=sorted(df['State'].unique().tolist()),
                        label="Select State"
                    )
                    district_input = gr.Dropdown(
                        choices=sorted(df['District'].unique().tolist()),
                        label="Select District"
                    )
                    market_input = gr.Dropdown(
                        choices=sorted(df['Market'].unique().tolist()),
                        label="Select Market"
                    )

                with gr.Column():
                    commodity_input = gr.Dropdown(
                        choices=sorted(df['Commodity'].unique().tolist()),
                        label="Select Commodity"
                    )
                    variety_input = gr.Dropdown(
                        choices=sorted(df['Variety'].unique().tolist()),
                        label="Select Variety"
                    )
                    grade_input = gr.Dropdown(
                        choices=sorted(df['Grade'].unique().tolist()),
                        label="Select Grade"
                    )

            with gr.Row():
                min_price_input = gr.Number(label="Minimum Price (‚Çπ)", value=1000)
                max_price_input = gr.Number(label="Maximum Price (‚Çπ)", value=1500)

            predict_btn = gr.Button("üîÆ Predict Price", variant="primary", size="lg")

            prediction_output = gr.Markdown()
            prediction_plot = gr.Plot()

            predict_btn.click(
                fn=predict_price,
                inputs=[state_input, district_input, market_input, commodity_input,
                       variety_input, grade_input, min_price_input, max_price_input],
                outputs=[prediction_output, prediction_plot]
            )

        # TAB 4: Model Comparison
        with gr.Tab("ü§ñ Model Performance"):
            gr.Markdown("### ML Model Evaluation & Comparison")
            model_btn = gr.Button("üìä Show Model Analysis", variant="primary")
            model_text = gr.Markdown()
            model_table = gr.Dataframe()
            model_plot1 = gr.Plot()
            model_plot2 = gr.Plot()

            model_btn.click(
                fn=show_model_comparison,
                outputs=[model_text, model_table, model_plot1, model_plot2]
            )

    gr.Markdown("""
    ---
    **Built with:** Gradio + Plotly + XGBoost + LightGBM + Random Forest

    **Data Source:** AGMARKNET Portal - Government of India
    """)

dashboard.launch(
    share=True,        # Creates public shareable link
    debug=True,        # Shows debug info
    inline=False,      # Don't display inline in Colab
    inbrowser=True     # Auto-open in new tab
)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c6c739abf2c5817c77.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://c6c739abf2c5817c77.gradio.live


