In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from math import sqrt
import logging
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"Using device: {device}")

class ARIMA(nn.Module):
    def __init__(self, p, d, q):
        super(ARIMA, self).__init__()
        self.p = p
        self.d = d
        self.q = q
        self.ar = nn.Linear(max(p, 1), 1)  # Ensure at least one input
        self.ma = nn.Linear(max(q, 1), 1)  # Ensure at least one input

    def forward(self, x):
        if x.size(1) < max(self.p, self.q, 1):
            # Pad input if it's too short
            x = torch.nn.functional.pad(x, (0, max(self.p, self.q, 1) - x.size(1)))
        ar_part = self.ar(x[:, :max(self.p, 1)])
        ma_part = self.ma(x[:, -max(self.q, 1):])
        return ar_part + ma_part

def preprocess_data(df):
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    df.fillna(method='ffill', inplace=True)
    
    scaler = MinMaxScaler()
    features = ['temp', 'oxygen', 'NH3', 'TP', 'TN']
    target = 'algae'
    df[features + [target]] = scaler.fit_transform(df[features + [target]])
    
    return df, features, target

def check_stationarity(series):
    result = adfuller(series.values)
    logging.info(f'ADF Statistic for {series.name}: {result[0]}')
    logging.info(f'p-value: {result[1]}')
    return series.diff().dropna() if result[1] > 0.05 else series

def evaluate_arima_model(data, order):
    train_size = int(len(data) * 0.8)
    train, test = data[:train_size], data[train_size:]
    history = [x for x in train]
    predictions = []

    model = ARIMA(order[0], order[1], order[2]).to(device)
    optimizer = torch.optim.Adam(model.parameters())
    criterion = nn.MSELoss()

    for t in range(len(test)):
        x = torch.tensor(history[-max(order[0], order[2], 1):], dtype=torch.float32).to(device)
        x = x.view(1, -1)  # Ensure x has shape (1, n)
        
        y_true = torch.tensor([test.iloc[t]], dtype=torch.float32).to(device)

        y_pred = model(x)
        
        loss = criterion(y_pred, y_true)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        predictions.append(y_pred.item())
        history.append(test.iloc[t])

    rmse = sqrt(mean_squared_error(test, predictions))
    return rmse

def find_best_arima_order(data, max_p=3, max_d=2, max_q=3):
    best_score, best_order = float("inf"), None
    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    rmse = evaluate_arima_model(data, (p,d,q))
                    if rmse < best_score:
                        best_score, best_order = rmse, (p,d,q)
                except Exception as e:
                    logging.warning(f"Error with ARIMA({p},{d},{q}): {str(e)}")
    logging.info(f'Best ARIMA{best_order} RMSE={best_score}')
    return best_order

def process_all_features(df, features, target):
    results = {}
    for column in features + [target]:
        logging.info(f"Processing {column}")
        series = df[column]
        processed_data = check_stationarity(series)
        best_order = find_best_arima_order(processed_data)
        results[column] = best_order
    return results

def main():
    try:
        df = pd.read_csv('/root/Download/AlgaeBloomForecast/wuguishan.csv', encoding='utf-8')
        logging.info("Successfully read the file")

        df, features, target = preprocess_data(df)
        best_orders = process_all_features(df, features, target)

        logging.info("\nBest ARIMA orders for each feature and target:")
        for column, order in best_orders.items():
            logging.info(f"{column}: ARIMA{order}")

    except Exception as e:
        logging.error(f"An error occurred in main: {str(e)}")

if __name__ == "__main__":
    main()

这段输出日志记录了一个数据分析任务的执行过程，主要包括以下几个步骤：

1. **设备使用**：
   - 使用了CUDA设备进行计算。

2. **文件读取**：
   - 成功读取了文件 `/tmp/ipykernel_23079/580323579.py`。

3. **数据处理**：
   - 处理了多个特征（如 `temp`, `oxygen`, `NH3`, `TP`, `TN`, `algae`）。
   - 对每个特征进行了ADF检验，并输出了ADF统计量和p值。

4. **模型拟合**：
   - 使用ARIMA模型对每个特征进行拟合。
   - 找到了每个特征的最佳ARIMA模型参数组合，并输出了相应的RMSE值。

5. **最佳ARIMA模型**：
   - `temp`: ARIMA(1, 1, 3)
   - `oxygen`: ARIMA(1, 2, 1)
   - `NH3`: ARIMA(0, 0, 0)
   - `TP`: ARIMA(1, 1, 2)
   - `TN`: ARIMA(2, 2, 0)
   - `algae`: ARIMA(1, 2, 2)

6. **警告信息**：
   - 提示DataFrame.fillna方法的使用在未来版本中将被弃用，建议使用`obj.ffill()`或`obj.bfill()`代替。

总体来说，这段日志记录了一个数据分析任务的详细过程，包括数据处理、模型拟合和结果输出，同时也指出了在数据处理过程中需要注意的一些事项。