In [6]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from datetime import datetime, timedelta

# Load the processed data
data = pd.read_csv('../data/processed/tomato_MAH_Pune_features.csv', parse_dates=['date'])

# Print column names
print("Available columns:")
print(data.columns.tolist())

Available columns:
['date', 'market_id', 'market_name', 'price', 'price_lag_1', 'price_lag_7', 'price_lag_14', 'price_lag_30', 'price_ma_7', 'price_ma_30', 'precipitation', 'temp_max', 'temp_min', 'humidity']


In [5]:
# Create visualization of the price trends
fig = go.Figure()

# Add the actual price line
fig.add_trace(
    go.Scatter(
        x=data['date'],
        y=data['price'],
        name='Actual Price',
        line=dict(color='blue')
    )
)

# Add the moving average lines
fig.add_trace(
    go.Scatter(
        x=data['date'],
        y=data['price_ma_7'],
        name='7-Day Moving Average',
        line=dict(color='red', dash='dash')
    )
)

# Add the 30-day moving average
fig.add_trace(
    go.Scatter(
        x=data['date'],
        y=data['price_ma_30'],
        name='30-Day Moving Average',
        line=dict(color='green', dash='dot')
    )
)

# Update layout
fig.update_layout(
    title='Tomato Price Trends in Pune',
    xaxis_title='Date',
    yaxis_title='Price (₹/Quintal)',
    template='plotly_white',
    showlegend=True
)

fig.show()

In [7]:
# Create subplots for weather factors
fig = make_subplots(rows=4, cols=1, 
                    subplot_titles=('Temperature', 'Precipitation', 'Humidity', 'Price'),
                    vertical_spacing=0.1,
                    row_heights=[0.25, 0.25, 0.25, 0.25])

# Temperature plot
fig.add_trace(
    go.Scatter(x=data['date'], y=data['temp_max'], name='Max Temp',
               line=dict(color='red')),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(x=data['date'], y=data['temp_min'], name='Min Temp',
               line=dict(color='blue')),
    row=1, col=1
)

# Precipitation plot
fig.add_trace(
    go.Bar(x=data['date'], y=data['precipitation'], name='Precipitation',
           marker_color='blue'),
    row=2, col=1
)

# Humidity plot
fig.add_trace(
    go.Scatter(x=data['date'], y=data['humidity'], name='Humidity',
               line=dict(color='green')),
    row=3, col=1
)

# Price plot
fig.add_trace(
    go.Scatter(x=data['date'], y=data['price'], name='Price',
               line=dict(color='purple')),
    row=4, col=1
)
fig.add_trace(
    go.Scatter(x=data['date'], y=data['price_ma_30'], name='30-Day MA',
               line=dict(color='orange', dash='dash')),
    row=4, col=1
)

# Update layout
fig.update_layout(height=1000, title_text='Weather Factors and Price Trends')
fig.show()

In [8]:
# Calculate correlations
weather_cols = ['temp_max', 'temp_min', 'precipitation', 'humidity']
correlations = data[['price'] + weather_cols].corr()['price'].sort_values(ascending=False)

print("\nCorrelations with price:")
print(correlations)

# Create correlation heatmap
fig = px.imshow(
    data[['price'] + weather_cols].corr(),
    labels=dict(color="Correlation"),
    title="Correlation Heatmap of Price and Weather Factors"
)
fig.show()


Correlations with price:
price            1.000000
precipitation    0.038041
humidity         0.024843
temp_min        -0.294672
temp_max        -0.294672
Name: price, dtype: float64


In [11]:
# Load forecast data
forecast_data = pd.read_csv('../data/processed/forecast_tomato_MAH_Pune_30d.csv', parse_dates=['date'])

# Create visualization of the forecast
fig = go.Figure()

# Historical price
fig.add_trace(
    go.Scatter(
        x=data['date'],
        y=data['price'],
        name='Historical Price',
        line=dict(color='blue')
    )
)

# Forecast
fig.add_trace(
    go.Scatter(
        x=forecast_data['date'],
        y=forecast_data['predicted'],
        name='30-Day Forecast',
        line=dict(color='red')
    )
)

# Add confidence intervals
fig.add_trace(
    go.Scatter(
        x=forecast_data['date'].tolist() + forecast_data['date'].tolist()[::-1],
        y=forecast_data['upper'].tolist() + forecast_data['lower'].tolist()[::-1],
        fill='toself',
        fillcolor='rgba(255,0,0,0.1)',
        line=dict(color='rgba(255,0,0,0)'),
        name='Confidence Interval'
    )
)

# Update layout
fig.update_layout(
    title='Tomato Price Forecast for Pune Market',
    xaxis_title='Date',
    yaxis_title='Price (₹/Quintal)',
    template='plotly_white',
    showlegend=True,
    hovermode='x unified'
)

fig.show()

In [12]:
# Calculate forecast statistics
latest_price = data['price'].iloc[-1]
avg_forecast = forecast_data['predicted'].mean()
min_forecast = forecast_data['predicted'].min()
max_forecast = forecast_data['predicted'].max()
forecast_trend = 'upward' if max_forecast > latest_price else 'downward'

print(f"Forecast Analysis:")
print(f"Current Price: ₹{latest_price:.2f} per quintal")
print(f"Average Forecasted Price: ₹{avg_forecast:.2f} per quintal")
print(f"Forecasted Price Range: ₹{min_forecast:.2f} - ₹{max_forecast:.2f} per quintal")
print(f"Overall Trend: {forecast_trend.capitalize()} trending\n")

# Calculate price volatility
volatility = forecast_data['predicted'].std()
print(f"Forecast Volatility: ₹{volatility:.2f} per quintal")

# Calculate average confidence interval width
ci_width = (forecast_data['upper'] - forecast_data['lower']).mean()
print(f"Average Confidence Interval Width: ₹{ci_width:.2f} per quintal")

Forecast Analysis:
Current Price: ₹59.50 per quintal
Average Forecasted Price: ₹59.70 per quintal
Forecasted Price Range: ₹58.65 - ₹60.78 per quintal
Overall Trend: Upward trending

Forecast Volatility: ₹0.52 per quintal
Average Confidence Interval Width: ₹30.71 per quintal


In [None]:
# Create price volatility visualization
fig = go.Figure()

# Calculate daily price changes
data['price_change'] = data['modal_price'].pct_change() * 100

# Add price volatility
fig.add_trace(go.Scatter(
    x=data['date'],
    y=data['price_change'],
    name='Daily Price Change (%)',
    line=dict(color='red')
))

# Update layout
fig.update_layout(
    title='Daily Price Volatility',
    xaxis_title='Date',
    yaxis_title='Price Change (%)',
    height=400,
    showlegend=True
)

fig.show()

# Calculate volatility statistics
print("\nVolatility Statistics:")
print("--------------------")
volatility_stats = data['price_change'].describe()
print(volatility_stats)

# Identify high volatility periods
high_volatility = data[abs(data['price_change']) > data['price_change'].std() * 2]
print("\nHigh Volatility Days:")
print("-------------------")
if len(high_volatility) > 0:
    for idx, row in high_volatility.iterrows():
        print(f"Date: {row['date'].strftime('%Y-%m-%d')}, Change: {row['price_change']:.2f}%")

In [None]:
# Create seasonal analysis visualization
monthly_avg = data.set_index('date').resample('M')['modal_price'].mean()

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=monthly_avg.index,
    y=monthly_avg.values,
    name='Monthly Average Price',
    line=dict(color='purple')
))

# Update layout
fig.update_layout(
    title='Monthly Price Trends',
    xaxis_title='Date',
    yaxis_title='Average Price (₹/kg)',
    height=400,
    showlegend=True
)

fig.show()

# Calculate monthly statistics
print("\nMonthly Price Statistics:")
print("-----------------------")
monthly_stats = monthly_avg.describe()
print(monthly_stats)

# Identify seasonal patterns
def get_month_name(month_num):
    return datetime(2000, month_num, 1).strftime('%B')

monthly_pattern = data.set_index('date').groupby(data['date'].dt.month)['modal_price'].agg(['mean', 'std'])
monthly_pattern.index = monthly_pattern.index.map(get_month_name)

print("\nSeasonal Price Patterns:")
print("----------------------")
print(monthly_pattern)

# Summary and Recommendations

Based on the analysis above, we can provide the following insights and recommendations:

1. **Price Trends**:
   - Track historical price patterns
   - Identify support and resistance levels
   - Monitor moving averages for trend changes

2. **Volatility Analysis**:
   - Watch for periods of high volatility
   - Use volatility as an early warning system
   - Consider market stabilization during volatile periods

3. **Seasonal Patterns**:
   - Plan harvesting around seasonal price peaks
   - Prepare for seasonal price fluctuations
   - Consider storage during low-price seasons

4. **Risk Management**:
   - Monitor glut indicators
   - Diversify market distribution
   - Use price forecasts for planning

# Model Performance Visualization and Comparison

This notebook visualizes the performance of our trained models and compares their predictions. We'll analyze:
1. Individual model predictions and accuracy
2. Ensemble model performance
3. Confidence intervals and uncertainty
4. Feature importance and model insights
5. Market risk signals and recommendations

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pickle
from datetime import datetime, timedelta
import json

# Load processed data
data = pd.read_csv('../data/processed/market_data_with_features.csv', parse_dates=['date'])

# Load saved models
with open('../models/pkl/sarimax_model.pkl', 'rb') as f:
    sarimax_model = pickle.load(f)
    
with open('../models/pkl/prophet_model.pkl', 'rb') as f:
    prophet_model = pickle.load(f)
    
with open('../models/pkl/xgboost_model.pkl', 'rb') as f:
    xgb_model = pickle.load(f)

# Load analysis results
with open('../data/processed/analysis_results.json', 'r') as f:
    analysis_results = json.load(f)

In [None]:
# Generate predictions for all models
def generate_predictions(data, forecast_days=30):
    end_date = data['date'].max()
    future_dates = pd.date_range(start=end_date + timedelta(days=1), periods=forecast_days)
    
    # SARIMAX predictions
    sarimax_forecast = sarimax_model.get_forecast(steps=forecast_days)
    sarimax_pred = pd.DataFrame({
        'date': future_dates,
        'SARIMAX': sarimax_forecast.predicted_mean,
        'SARIMAX_lower': sarimax_forecast.conf_int()['lower modal_price'],
        'SARIMAX_upper': sarimax_forecast.conf_int()['upper modal_price']
    })
    
    # Prophet predictions
    future_prophet = prophet_model.make_future_dataframe(periods=forecast_days)
    prophet_forecast = prophet_model.predict(future_prophet)
    prophet_pred = pd.DataFrame({
        'date': future_dates,
        'Prophet': prophet_forecast.tail(forecast_days)['yhat'],
        'Prophet_lower': prophet_forecast.tail(forecast_days)['yhat_lower'],
        'Prophet_upper': prophet_forecast.tail(forecast_days)['yhat_upper']
    })
    
    # XGBoost predictions (using last available features)
    last_features = data.iloc[-1:][['MA_7', 'MA_30', 'RSI', 'price_volatility_30d', 
                                   'temp_max', 'temp_min', 'precipitation', 'humidity']]
    xgb_pred = pd.DataFrame({
        'date': future_dates,
        'XGBoost': [xgb_model.predict(last_features)[0]] * forecast_days
    })
    
    # Combine predictions
    predictions = sarimax_pred.merge(prophet_pred, on='date').merge(xgb_pred, on='date')
    
    # Calculate ensemble prediction (weighted average)
    predictions['Ensemble'] = (
        predictions['SARIMAX'] * 0.4 + 
        predictions['Prophet'] * 0.4 + 
        predictions['XGBoost'] * 0.2
    )
    
    return predictions

# Generate predictions
forecast_days = 30
predictions = generate_predictions(data, forecast_days)

# Plot historical data and predictions
fig = go.Figure()

# Historical data
fig.add_trace(go.Scatter(
    x=data['date'],
    y=data['modal_price'],
    name='Historical Price',
    line=dict(color='black')
))

# Add predictions for each model
models = ['SARIMAX', 'Prophet', 'XGBoost', 'Ensemble']
colors = ['blue', 'red', 'green', 'purple']

for model, color in zip(models, colors):
    fig.add_trace(go.Scatter(
        x=predictions['date'],
        y=predictions[model],
        name=f'{model} Forecast',
        line=dict(color=color, dash='dash')
    ))
    
    # Add confidence intervals for SARIMAX and Prophet
    if model in ['SARIMAX', 'Prophet']:
        fig.add_trace(go.Scatter(
            x=predictions['date'].tolist() + predictions['date'].tolist()[::-1],
            y=predictions[f'{model}_upper'].tolist() + predictions[f'{model}_lower'].tolist()[::-1],
            fill='toself',
            fillcolor=f'rgba{tuple(list(plt.hex2color(color)) + [0.2])}',
            line=dict(color='rgba(255,255,255,0)'),
            name=f'{model} Confidence Interval'
        ))

fig.update_layout(
    title='Model Predictions Comparison',
    xaxis_title='Date',
    yaxis_title='Price',
    height=600,
    showlegend=True,
    hovermode='x unified'
)

fig.show()

# Calculate and display performance metrics
last_30_days = data.tail(30)
print("\nModel Performance Metrics (Last 30 Days):")
print("----------------------------------------")
for model in models:
    if model != 'Ensemble':  # Skip ensemble as it's only for future predictions
        mse = mean_squared_error(last_30_days['modal_price'], 
                               predictions[model].head(30))
        mae = mean_absolute_error(last_30_days['modal_price'], 
                                predictions[model].head(30))
        print(f"\n{model}:")
        print(f"RMSE: {np.sqrt(mse):.2f}")
        print(f"MAE:  {mae:.2f}")

In [None]:
# Analyze prediction uncertainty and risk levels
def analyze_risk_levels(predictions):
    # Calculate risk levels based on prediction uncertainty
    uncertainty_sarimax = predictions['SARIMAX_upper'] - predictions['SARIMAX_lower']
    uncertainty_prophet = predictions['Prophet_upper'] - predictions['Prophet_lower']
    
    # Normalize uncertainties
    max_uncertainty = max(uncertainty_sarimax.max(), uncertainty_prophet.max())
    uncertainty_normalized = (uncertainty_sarimax + uncertainty_prophet) / (2 * max_uncertainty)
    
    # Calculate price trend
    price_trend = predictions['Ensemble'].pct_change()
    
    # Define risk levels
    risk_levels = pd.DataFrame({
        'date': predictions['date'],
        'uncertainty': uncertainty_normalized,
        'price_trend': price_trend,
        'risk_score': uncertainty_normalized * (1 + abs(price_trend))
    })
    
    return risk_levels

# Calculate risk levels
risk_levels = analyze_risk_levels(predictions)

# Create risk visualization
fig = make_subplots(rows=2, cols=1, 
                    subplot_titles=('Price Forecasts with Uncertainty', 
                                  'Market Risk Levels'))

# Price forecasts
fig.add_trace(
    go.Scatter(x=data['date'], y=data['modal_price'], 
               name='Historical', line=dict(color='black')),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=predictions['date'], y=predictions['Ensemble'],
               name='Ensemble Forecast', line=dict(color='blue', dash='dash')),
    row=1, col=1
)

# Risk levels
fig.add_trace(
    go.Scatter(x=risk_levels['date'], y=risk_levels['risk_score'],
               name='Risk Score', fill='tozeroy',
               line=dict(color='red')),
    row=2, col=1
)

fig.update_layout(
    height=800,
    title_text="Price Forecast and Market Risk Analysis",
    showlegend=True
)

fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="Price", row=1, col=1)
fig.update_yaxes(title_text="Risk Score", row=2, col=1)

fig.show()

# Print risk analysis summary
print("\nRisk Analysis Summary:")
print("---------------------")
risk_categories = pd.qcut(risk_levels['risk_score'], q=3, labels=['Low', 'Medium', 'High'])
risk_summary = risk_categories.value_counts()
print("\nRisk Distribution (next 30 days):")
print(risk_summary)

# Identify high-risk periods
high_risk_days = risk_levels[risk_categories == 'High']
if not high_risk_days.empty:
    print("\nHigh Risk Periods:")
    for _, day in high_risk_days.iterrows():
        print(f"- {day['date'].strftime('%Y-%m-%d')}: Risk Score = {day['risk_score']:.2f}")

In [None]:
# Generate market recommendations
def generate_recommendations(predictions, risk_levels):
    current_price = data['modal_price'].iloc[-1]
    avg_forecast = predictions['Ensemble'].mean()
    max_risk = risk_levels['risk_score'].max()
    
    recommendations = []
    
    # Price trend analysis
    if avg_forecast > current_price * 1.1:
        recommendations.append("HOLD: Prices expected to rise significantly. Consider storing produce if possible.")
    elif avg_forecast < current_price * 0.9:
        recommendations.append("SELL: Prices expected to decline. Consider immediate market distribution.")
    
    # Risk level recommendations
    if max_risk > 0.7:
        recommendations.append("CAUTION: High market volatility expected. Diversify distribution across markets.")
    elif max_risk > 0.4:
        recommendations.append("MONITOR: Moderate market uncertainty. Keep track of daily price movements.")
    
    # Market strategy
    ma7 = data['MA_7'].iloc[-1]
    ma30 = data['MA_30'].iloc[-1]
    if ma7 > ma30:
        recommendations.append("POSITIVE TREND: Short-term average above long-term. Favorable market conditions.")
    else:
        recommendations.append("NEGATIVE TREND: Short-term average below long-term. Exercise caution.")
        
    return recommendations

# Get and display recommendations
recommendations = generate_recommendations(predictions, risk_levels)

print("\nMarket Recommendations:")
print("----------------------")
for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec}")

# Create summary visualization
fig = go.Figure()

# Add price and forecast
fig.add_trace(go.Scatter(x=data['date'], y=data['modal_price'],
                        name='Historical Price', line=dict(color='black')))
fig.add_trace(go.Scatter(x=predictions['date'], y=predictions['Ensemble'],
                        name='Forecast', line=dict(color='blue', dash='dash')))

# Add MA lines
fig.add_trace(go.Scatter(x=data['date'], y=data['MA_7'],
                        name='7-day MA', line=dict(color='green', width=1)))
fig.add_trace(go.Scatter(x=data['date'], y=data['MA_30'],
                        name='30-day MA', line=dict(color='red', width=1)))

fig.update_layout(
    title='Market Summary with Moving Averages',
    xaxis_title='Date',
    yaxis_title='Price',
    height=500,
    showlegend=True
)

fig.show()