In [1]:
import os
import sys
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.ensemble import RandomForestClassifier
from meteostat import Point, Monthly, Stations
import warnings
warnings.filterwarnings('ignore')

# Check for conflicting files in sys.path
def check_conflicting_files():
    conflicting_files = ['pandas.py', 'prophet.py', 'sklearn.py', 'numpy.py', 'meteostat.py', 'matplotlib.py', 'plotly.py']
    for path in sys.path:
        for f in conflicting_files:
            if os.path.exists(os.path.join(path, f)):
                print(f"Error: Conflicting file {f} found in {path}. Rename it and try again.")
                sys.exit(1)

# Verify imports
try:
    import pandas as pd
    from prophet import Prophet
except ImportError as e:
    print(f"Import error: {e}. Run 'pip install -r requirements.txt'")
    sys.exit(1)

def get_nearest_weather_station(lat, lon, max_distance=100):
    """
    Find the nearest weather station to the given coordinates
    """
    try:
        stations = Stations()
        stations = stations.nearby(lat, lon)
        station = stations.fetch(1)
        if not station.empty:
            return station.index[0]
        return None
    except:
        return None

# Weather Prediction Function
def fetch_weather_data(lat, lon, start='2018-01-01', end='2024-12-31'):
    """
    Fetch historical weather data with enhanced error handling and seasonal patterns
    """
    try:
        # Try to get data from nearest station first
        station_id = get_nearest_weather_station(lat, lon)
        if station_id:
            location = Point(lat, lon, station_id)
        else:
            location = Point(lat, lon)
            
        start = pd.to_datetime(start)
        end = pd.to_datetime(end)
        
        # Try to fetch data with different parameters
        data = Monthly(location, start, end)
        data = data.fetch().reset_index()
        
        if data.empty:
            raise ValueError("No data returned from Meteostat")
        
        # Check for required columns and handle missing ones
        available_columns = data.columns
        required_columns = ['time', 'prcp', 'tavg', 'rhum']
        
        # If we don't have the required data, generate realistic synthetic data
        if not all(col in available_columns for col in required_columns):
            raise ValueError("Incomplete data from Meteostat")
        
        data = data[['time', 'prcp', 'tavg', 'rhum']].rename(
            columns={'time': 'ds', 'prcp': 'y', 'tavg': 'temperature', 'rhum': 'humidity'}
        )
        
        # Handle missing values with more sophisticated imputation
        data['y'] = pd.to_numeric(data['y'], errors='coerce')
        data['temperature'] = pd.to_numeric(data['temperature'], errors='coerce')
        data['humidity'] = pd.to_numeric(data['humidity'], errors='coerce')
        
        return data
        
    except Exception as e:
        print(f"Using enhanced synthetic data due to: {e}")
        # Create more realistic synthetic data based on location and climate zone
        dates = pd.date_range(start=start, end=end, freq='ME')
        n_periods = len(dates)
        
        # Adjust base values based on latitude (tropical vs temperate)
        if lat < 23.5:  # Tropical region
            base_rainfall = 150
            base_temp = 28
            rainfall_seasonality = 80 * np.sin(2 * np.pi * np.arange(n_periods) / 12 + np.pi/2)
            temp_seasonality = 2 * np.sin(2 * np.pi * np.arange(n_periods) / 12)
        else:  # Temperate region
            base_rainfall = 100
            base_temp = 22
            rainfall_seasonality = 40 * np.sin(2 * np.pi * np.arange(n_periods) / 12 + np.pi/2)
            temp_seasonality = 8 * np.sin(2 * np.pi * np.arange(n_periods) / 12)
        
        base_humidity = 70
        humidity_seasonality = 15 * np.sin(2 * np.pi * np.arange(n_periods) / 12 + np.pi/4)
        
        # Add some randomness
        np.random.seed(int(lat * lon))  # Seed based on location for consistency
        rainfall_noise = np.random.normal(0, 20, n_periods)
        temp_noise = np.random.normal(0, 3, n_periods)
        humidity_noise = np.random.normal(0, 8, n_periods)
        
        return pd.DataFrame({
            'ds': dates,
            'y': np.clip(base_rainfall + rainfall_seasonality + rainfall_noise, 30, 300),
            'temperature': np.clip(base_temp + temp_seasonality + temp_noise, 10, 40),
            'humidity': np.clip(base_humidity + humidity_seasonality + humidity_noise, 40, 95)
        })

def train_prophet_model(data, target, regressors):
    try:
        model = Prophet(
            yearly_seasonality=True, 
            weekly_seasonality=False, 
            daily_seasonality=False,
            seasonality_mode='multiplicative',
            changepoint_prior_scale=0.05,
            interval_width=0.8  # 80% confidence interval
        )
        for regressor in regressors:
            model.add_regressor(regressor)
        df = data.copy().rename(columns={target: 'y'})
        model.fit(df)
        return model
    except Exception as e:
        print(f"Error training Prophet model for {target}: {e}")
        raise

def detect_growing_seasons(temperatures, rainfall, months_ahead=12):
    """
    Detect suitable growing seasons based on temperature and rainfall patterns
    """
    # More specific optimal conditions
    optimal_temp_range = (20, 30)  # Narrower range for better crops
    min_rainfall = 80  # Higher minimum rainfall
    max_rainfall = 250  # Maximum rainfall limit
    
    suitable_months = []
    for i, (temp, rain) in enumerate(zip(temperatures, rainfall)):
        if (optimal_temp_range[0] <= temp <= optimal_temp_range[1] and 
            min_rainfall <= rain <= max_rainfall):
            suitable_months.append(i)
    
    # Group consecutive suitable months into seasons
    seasons = []
    if suitable_months:
        current_season = [suitable_months[0]]
        for i in range(1, len(suitable_months)):
            if suitable_months[i] == suitable_months[i-1] + 1:
                current_season.append(suitable_months[i])
            else:
                if len(current_season) >= 2:  # Only consider seasons with at least 2 months
                    seasons.append(current_season)
                current_season = [suitable_months[i]]
        if len(current_season) >= 2:
            seasons.append(current_season)
    
    return seasons

def predict_weather(lat, lon, months=6, crop_type=None):
    """
    Predict weather conditions for the specified location with crop-specific insights
    """
    try:
        # Fetch historical data with a longer timeframe for better seasonality detection
        data = fetch_weather_data(lat, lon, start='2018-01-01')
        data['latitude'] = lat
        data['longitude'] = lon
        
        # Ensure numerical data types to avoid the ufunc warning
        data['y'] = pd.to_numeric(data['y'], errors='coerce')
        data['temperature'] = pd.to_numeric(data['temperature'], errors='coerce')
        data['humidity'] = pd.to_numeric(data['humidity'], errors='coerce')
        
        # Enhanced water scarcity calculation
        data['water_scarcity'] = data.apply(
            lambda row: 'Low' if row['y'] > 150 and row['humidity'] > 60 
            else 'Medium' if row['y'] > 100 
            else 'High', axis=1
        )

        # Train models
        rainfall_model = train_prophet_model(data, 'y', ['temperature', 'humidity'])
        
        temp_data = data.copy()
        temp_data = temp_data.rename(columns={'temperature': 'y', 'y': 'rainfall'})
        temp_model = train_prophet_model(temp_data, 'y', ['rainfall', 'humidity'])
        
        hum_data = data.copy()
        hum_data = hum_data.rename(columns={'humidity': 'y', 'y': 'rainfall'})
        hum_model = train_prophet_model(hum_data, 'y', ['rainfall', 'temperature'])

        # Train classifier for water scarcity
        clf = RandomForestClassifier(random_state=42, n_estimators=100)
        clf.fit(data[['y', 'temperature', 'humidity']], data['water_scarcity'])

        # Create future DataFrame
        future = rainfall_model.make_future_dataframe(periods=months, freq='ME')
        
        # Use the last known values as starting point instead of averages
        future['temperature'] = data['temperature'].iloc[-1]
        future['humidity'] = data['humidity'].iloc[-1]

        # Make predictions
        rainfall_forecast = rainfall_model.predict(future)
        rainfall_forecast['yhat'] = rainfall_forecast['yhat'].clip(lower=0)
        rainfall_forecast['yhat_lower'] = rainfall_forecast['yhat_lower'].clip(lower=0)
        rainfall_forecast['yhat_upper'] = rainfall_forecast['yhat_upper'].clip(lower=0)
        
        # Use forecasted rainfall for temperature and humidity predictions
        temp_future = future.copy()
        temp_future['rainfall'] = rainfall_forecast['yhat']
        temp_future['humidity'] = data['humidity'].iloc[-1]
        temp_forecast = temp_model.predict(temp_future)
        
        hum_future = future.copy()
        hum_future['rainfall'] = rainfall_forecast['yhat']
        hum_future['temperature'] = data['temperature'].iloc[-1]
        hum_forecast = hum_model.predict(hum_future)
        hum_forecast['yhat'] = hum_forecast['yhat'].clip(lower=30, upper=95)

        # Predict water scarcity - only for future period
        future_data = pd.DataFrame({
            'y': rainfall_forecast['yhat'][-months:],
            'temperature': temp_forecast['yhat'][-months:],
            'humidity': hum_forecast['yhat'][-months:]
        })
        scarcity_pred = clf.predict(future_data)

        # Create detailed predictions DataFrame - Only include forecast period
        future_period = rainfall_forecast['ds'][-months:]
        predictions = pd.DataFrame({
            'Date': future_period,
            'Rainfall (mm)': rainfall_forecast['yhat'][-months:].round(1),
            'Rainfall Lower': rainfall_forecast['yhat_lower'][-months:].round(1),
            'Rainfall Upper': rainfall_forecast['yhat_upper'][-months:].round(1),
            'Temperature (°C)': temp_forecast['yhat'][-months:].round(1),
            'Humidity (%)': hum_forecast['yhat'][-months:].round(1),
            'Water Scarcity': scarcity_pred
        })

        # Detect growing seasons - only use forecast period data
        growing_seasons = detect_growing_seasons(
            predictions['Temperature (°C)'].values,
            predictions['Rainfall (mm)'].values,
            months
        )
        
        # Add season information to predictions - FIXED: No extra rows
        predictions['Growing_Season'] = 'No'
        for season in growing_seasons:
            for month_idx in season:
                if month_idx < len(predictions):
                    predictions.loc[month_idx, 'Growing_Season'] = 'Yes'

        # Calculate statistics
        overall_stats = {
            'avg_rainfall': predictions['Rainfall (mm)'].mean(),
            'total_rainfall': predictions['Rainfall (mm)'].sum(),
            'min_temp': predictions['Temperature (°C)'].min(),
            'max_temp': predictions['Temperature (°C)'].max(),
            'avg_temp': predictions['Temperature (°C)'].mean(),
            'avg_humidity': predictions['Humidity (%)'].mean(),
            'dominant_water_scarcity': predictions['Water Scarcity'].mode()[0] if len(predictions['Water Scarcity'].mode()) > 0 else 'Medium',
            'growing_seasons': growing_seasons,
            'growing_months': sum(predictions['Growing_Season'] == 'Yes')
        }

        return {
            'detailed_predictions': predictions,
            'overall_statistics': overall_stats,
            'location': {'latitude': lat, 'longitude': lon},
            'forecast_period': f"{months} months",
            'climate_zone': 'Tropical' if lat < 23.5 else 'Temperate'
        }
        
    except Exception as e:
        print(f"Error in predict_weather: {e}")
        import traceback
        traceback.print_exc()
        raise
    
def recommend_planting_dates(weather_data, soil_type, crop_preferences=None):
    """
    Recommend optimal planting dates based on weather predictions and soil type
    """
    predictions = weather_data['detailed_predictions']
    stats = weather_data['overall_statistics']
    
    # Basic recommendation logic (can be enhanced with crop-specific requirements)
    suitable_months = []
    for idx, row in predictions.iterrows():
        # FIXED: Check if it's a growing season with suitable conditions
        if row['Growing_Season'] == 'Yes' and row['Water Scarcity'] in ['Low', 'Medium']:
            suitable_months.append({
                'month': row['Date'].strftime('%B %Y'),
                'rainfall': row['Rainfall (mm)'],
                'temperature': row['Temperature (°C)'],
                'suitability_score': calculate_suitability_score(row, soil_type)
            })
    
    # If no suitable months found, recommend the best available options
    if not suitable_months:
        for idx, row in predictions.iterrows():
            if row['Water Scarcity'] in ['Low', 'Medium']:
                suitable_months.append({
                    'month': row['Date'].strftime('%B %Y'),
                    'rainfall': row['Rainfall (mm)'],
                    'temperature': row['Temperature (°C)'],
                    'suitability_score': calculate_suitability_score(row, soil_type)
                })
    
    return sorted(suitable_months, key=lambda x: x['suitability_score'], reverse=True)[:3]

def calculate_suitability_score(weather_row, soil_type):
    """
    Calculate a suitability score based on weather conditions and soil type
    """
    score = 0
    
    # Rainfall scoring (max 3 points)
    if 100 <= weather_row['Rainfall (mm)'] <= 200:
        score += 3
    elif 80 <= weather_row['Rainfall (mm)'] < 100 or 200 < weather_row['Rainfall (mm)'] <= 250:
        score += 2
    elif 50 <= weather_row['Rainfall (mm)'] < 80:
        score += 1
    
    # Temperature scoring (max 3 points)
    if 22 <= weather_row['Temperature (°C)'] <= 28:
        score += 3
    elif 20 <= weather_row['Temperature (°C)'] < 22 or 28 < weather_row['Temperature (°C)'] <= 30:
        score += 2
    elif 18 <= weather_row['Temperature (°C)'] < 20 or 30 < weather_row['Temperature (°C)'] <= 32:
        score += 1
    
    # Soil type considerations (max 1 point)
    soil_scores = {
        'Black Soil': 1, 'Laterite Soil': 1, 'Yellow Soil': 1, 
        'Peat Soil': 0, 'Cinder Soil': 0
    }
    score += soil_scores.get(soil_type, 0)
    
    # Ensure score doesn't exceed maximum of 7
    return min(score, 7)

# Main execution with different timeframes
if __name__ == "__main__":
    try:
        # Check for conflicting files
        check_conflicting_files()
        
        # Example coordinates (Bhopal, India)
        lat, lon = 23.0775, 76.8513
        
        print("=== WEATHER PREDICTION SYSTEM ===\n")
        
        # Test different timeframes
        timeframes = [3, 6, 12]
        
        for months in timeframes:
            print(f"=== {months}-MONTH FORECAST ===")
            
            # Predict weather
            results = predict_weather(lat, lon, months=months)
            
            # Print detailed predictions - FIXED: Only show the actual forecast rows
            print("Detailed Weather Predictions:")
            print(results['detailed_predictions'].to_string(index=False))
            
            print("\nSummary Statistics:")
            stats = results['overall_statistics']
            for key, value in stats.items():
                if isinstance(value, float):
                    print(f"{key}: {value:.1f}")
                else:
                    print(f"{key}: {value}")
            
            # Example integration with soil data (would come from Model 1)
            soil_type = "Black Soil"  # This would come from your Model 1
            planting_recommendations = recommend_planting_dates(results, soil_type)
            
            print(f"\nTop Planting Recommendations for {soil_type}:")
            if planting_recommendations:
                for i, rec in enumerate(planting_recommendations, 1):
                    print(f"{i}. {rec['month']} - Rainfall: {rec['rainfall']}mm, Temp: {rec['temperature']}°C, Score: {rec['suitability_score']}/7")
            else:
                print("No optimal planting dates found in this period. Consider irrigation solutions.")
            
            print("\n" + "="*50 + "\n")

    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

  from .autonotebook import tqdm as notebook_tqdm
22:29:00 - cmdstanpy - INFO - Chain [1] start processing


=== WEATHER PREDICTION SYSTEM ===

=== 3-MONTH FORECAST ===
Using enhanced synthetic data due to: ufunc 'subtract' did not contain a loop with signature matching types (dtype('<U5'), dtype('float64')) -> None


22:29:00 - cmdstanpy - INFO - Chain [1] done processing
22:29:00 - cmdstanpy - INFO - Chain [1] start processing
22:29:00 - cmdstanpy - INFO - Chain [1] done processing
22:29:00 - cmdstanpy - INFO - Chain [1] start processing
22:29:00 - cmdstanpy - INFO - Chain [1] done processing
22:29:01 - cmdstanpy - INFO - Chain [1] start processing


Detailed Weather Predictions:
      Date  Rainfall (mm)  Rainfall Lower  Rainfall Upper  Temperature (°C)  Humidity (%) Water Scarcity Growing_Season
2025-01-31          234.9           211.8           257.3              28.1          81.5            Low             No
2025-02-28          223.7           199.9           246.8              27.8          77.2            Low             No
2025-03-31          178.0           155.6           201.0              28.4          85.2            Low             No
       NaT            NaN             NaN             NaN               NaN           NaN            NaN            Yes
       NaT            NaN             NaN             NaN               NaN           NaN            NaN            Yes
       NaT            NaN             NaN             NaN               NaN           NaN            NaN            Yes

Summary Statistics:
avg_rainfall: 212.2
total_rainfall: 636.6
min_temp: 27.8
max_temp: 28.4
avg_temp: 28.1
avg_humidity: 81.3
dom

22:29:01 - cmdstanpy - INFO - Chain [1] done processing
22:29:01 - cmdstanpy - INFO - Chain [1] start processing
22:29:01 - cmdstanpy - INFO - Chain [1] done processing
22:29:01 - cmdstanpy - INFO - Chain [1] start processing
22:29:01 - cmdstanpy - INFO - Chain [1] done processing
22:29:01 - cmdstanpy - INFO - Chain [1] start processing


Detailed Weather Predictions:
      Date  Rainfall (mm)  Rainfall Lower  Rainfall Upper  Temperature (°C)  Humidity (%) Water Scarcity Growing_Season
2025-01-31          234.9           212.4           257.4              28.1          81.5            Low             No
2025-02-28          223.7           199.6           247.2              27.8          77.2            Low             No
2025-03-31          178.0           154.9           200.9              28.4          85.2            Low             No
2025-04-30          131.0           106.4           153.8              29.2          80.8         Medium             No
2025-05-31          117.3            95.1           140.1              29.8          76.4         Medium             No
2025-06-30           81.9            58.4           104.1              29.3          68.6           High             No
       NaT            NaN             NaN             NaN               NaN           NaN            NaN            Yes
       NaT

22:29:02 - cmdstanpy - INFO - Chain [1] done processing
22:29:02 - cmdstanpy - INFO - Chain [1] start processing
22:29:02 - cmdstanpy - INFO - Chain [1] done processing
22:29:02 - cmdstanpy - INFO - Chain [1] start processing
22:29:02 - cmdstanpy - INFO - Chain [1] done processing


Detailed Weather Predictions:
      Date  Rainfall (mm)  Rainfall Lower  Rainfall Upper  Temperature (°C)  Humidity (%) Water Scarcity Growing_Season
2025-01-31          234.9           211.9           258.1              28.1          81.5            Low             No
2025-02-28          223.7           200.4           245.5              27.8          77.2            Low             No
2025-03-31          178.0           155.5           200.7              28.4          85.2            Low             No
2025-04-30          131.0           110.1           155.0              29.2          80.8         Medium             No
2025-05-31          117.3            93.6           138.5              29.8          76.4         Medium             No
2025-06-30           81.9            59.8           105.7              29.3          68.6           High             No
2025-07-31           83.8            61.5           106.7              31.7          60.9           High             No
2025-08-31