In [14]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from prophet import Prophet
import matplotlib.pyplot as plt
from datetime import timedelta
import logging
import torch
import os

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def preprocess_data(df):
    df['ds'] = pd.to_datetime(df['date'])
    df.set_index('ds', inplace=True)
    df = df.ffill()
    
    features = ['temp', 'oxygen', 'NH3', 'TP', 'TN', 'algae']
    df_features = df[features]
    
    scaler = MinMaxScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(df_features), columns=features, index=df.index)
    
    # Add lagged temperature
    df_scaled['temp_lag'] = df_scaled['temp'].shift(1)
    df_scaled.dropna(inplace=True)
    
    return df_scaled, features, scaler

def prepare_prophet_data(df):
    prophet_df = df.reset_index()
    prophet_df = prophet_df.rename(columns={'ds': 'ds', 'algae': 'y'})
    return prophet_df

def train_prophet_model(train_df, use_gpu=True):
    if use_gpu and torch.cuda.is_available():
        logging.info("Using GPU for training")
        os.environ['PROPHET_USE_GPU'] = 'true'
    else:
        logging.info("Using CPU for training")
        os.environ['PROPHET_USE_GPU'] = 'false'

    model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=True
    )
    
    # Add regressors
    for col in train_df.columns:
        if col not in ['ds', 'y']:
            model.add_regressor(col)
    
    model.fit(train_df)
    
    return model

def forecast_algae(model, future_df):
    forecast = model.predict(future_df)
    return forecast

def inverse_transform_feature(scaler, data, feature_index):
    dummy = np.zeros((len(data), len(scaler.scale_)))
    dummy[:, feature_index] = data
    return scaler.inverse_transform(dummy)[:, feature_index]

def plot_forecast(original_data, forecast, feature_name):
    plt.figure(figsize=(12, 6))
    plt.plot(original_data.index, original_data, label='Observed')
    plt.plot(forecast['ds'], forecast['yhat'], label='Forecast', color='red')
    plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='red', alpha=0.2)
    
    plt.title(f'{feature_name} Forecast')
    plt.xlabel('Date')
    plt.ylabel(feature_name)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'{feature_name}_forecast.png')
    plt.close()

def main():
    try:
        df = pd.read_csv('/root/Download/AlgaeBloomForecast/wuguishan.csv', encoding='utf-8')
        logging.info("Successfully read the file")
        
        df_scaled, features, scaler = preprocess_data(df)
        prophet_df = prepare_prophet_data(df_scaled)
        
        # Split data into train and test sets
        train_size = int(len(prophet_df) * 0.8)
        train_df = prophet_df[:train_size]
        test_df = prophet_df[train_size:]
        
        # Train the model
        model = train_prophet_model(train_df)
        
        # Forecast
        future_df = model.make_future_dataframe(periods=30)
        for col in prophet_df.columns:
            if col not in ['ds', 'y']:
                future_df[col] = prophet_df[col].reindex(future_df.index)
        
        forecast = forecast_algae(model, future_df)
        
        # Inverse transform the forecast
        algae_index = features.index('algae')
        forecast['yhat'] = inverse_transform_feature(scaler, forecast['yhat'].values, algae_index)
        forecast['yhat_lower'] = inverse_transform_feature(scaler, forecast['yhat_lower'].values, algae_index)
        forecast['yhat_upper'] = inverse_transform_feature(scaler, forecast['yhat_upper'].values, algae_index)
        
        logging.info("\nAlgae bloom forecast for the next 30 days:")
        logging.info(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(30))
        
        # Plot the forecast
        original_algae = df['algae']
        plot_forecast(original_algae, forecast, 'Algae Bloom')
        logging.info("Forecast plot saved as 'Algae_Bloom_forecast.png'")
        
    except Exception as e:
        logging.error(f"An error occurred in main: {str(e)}")
        import traceback
        logging.error(traceback.format_exc())

if __name__ == "__main__":
    main()

2024-08-15 09:15:49,126 - INFO - Successfully read the file
2024-08-15 09:15:49,140 - INFO - Using GPU for training


2024-08-15 09:15:49,237 - DEBUG - input tempfile: /tmp/tmp6228jyn8/b6akb765.json
2024-08-15 09:15:49,356 - DEBUG - input tempfile: /tmp/tmp6228jyn8/p1vl05po.json
2024-08-15 09:15:49,367 - DEBUG - idx 0
2024-08-15 09:15:49,368 - DEBUG - running CmdStan, num_threads: None
2024-08-15 09:15:49,368 - DEBUG - CmdStan args: ['/root/.conda/envs/hydro/envs/algae/lib/python3.12/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=87073', 'data', 'file=/tmp/tmp6228jyn8/b6akb765.json', 'init=/tmp/tmp6228jyn8/p1vl05po.json', 'output', 'file=/tmp/tmp6228jyn8/prophet_modelyzw5jfr1/prophet_model-20240815091549.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:15:49 - cmdstanpy - INFO - Chain [1] start processing
2024-08-15 09:15:49,369 - INFO - Chain [1] start processing
09:15:49 - cmdstanpy - INFO - Chain [1] done processing
2024-08-15 09:15:49,658 - INFO - Chain [1] done processing
2024-08-15 09:15:50,072 - INFO - 
Algae bloom forecast for the next 30 days:
2024-08-15 09:1

2024-08-15 09:12:39,582 - INFO - Successfully read the file
2024-08-15 09:12:39,592 - INFO - Using GPU for training
2024-08-15 09:12:39,619 - DEBUG - input tempfile: /tmp/tmp6228jyn8/zf6dieab.json
2024-08-15 09:12:39,726 - DEBUG - input tempfile: /tmp/tmp6228jyn8/5n3b75jt.json
2024-08-15 09:12:39,734 - DEBUG - idx 0
2024-08-15 09:12:39,735 - DEBUG - running CmdStan, num_threads: None
2024-08-15 09:12:39,735 - DEBUG - CmdStan args: ['/root/.conda/envs/hydro/envs/algae/lib/python3.12/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=32995', 'data', 'file=/tmp/tmp6228jyn8/zf6dieab.json', 'init=/tmp/tmp6228jyn8/5n3b75jt.json', 'output', 'file=/tmp/tmp6228jyn8/prophet_model7he1lhgf/prophet_model-20240815091239.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:12:39 - cmdstanpy - INFO - Chain [1] start processing
2024-08-15 09:12:39,736 - INFO - Chain [1] start processing
09:12:39 - cmdstanpy - INFO - Chain [1] done processing
2024-08-15 09:12:39,973 - INFO - Ch