导入所需库

In [1]:
import pandas as pd
import numpy as np
import torch
import lightning.pytorch as pl

from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import LearningRateMonitor
from torch.utils.data import DataLoader

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
import pickle

读取数据及数据预处理

In [2]:
# read data
data_path = '../data/'
weather_data = pd.read_csv(data_path + 'neimeng_weather.csv', sep=',')
solar_data = pd.read_csv(data_path + 'neimeng_solar_output.csv', sep=',')
wind_data = pd.read_csv(data_path + 'neimeng_wind_output.csv', sep=',')

weather_data.drop(columns=['id', 'lng', 'lat', 'region_name'], inplace=True)
solar_data.drop(columns=['type'], inplace=True)
wind_data.drop(columns=['type'], inplace=True)

city_code_map = {
    '乌兰察布':	1509,
    '锡林郭勒':	1525,
    '包头':	1502,
    '巴彦淖尔':	1508,
    '阿拉善盟':	1529,
    '呼和浩特':	1501,
    '鄂尔多斯':	1506,
    '乌海':	1503,
    '鄂尔多斯+薛家湾': 1506
}

solar_data['city_name'] = solar_data['city_name'].map(city_code_map)
wind_data['city_name'] = wind_data['city_name'].map(city_code_map)
weather_data.rename(columns={'region_code': 'city_code', 'ts': 'datetime'}, inplace=True)
solar_data.rename(columns={'city_name': 'city_code', 'date_time': 'datetime', 'value': 'solar_output'}, inplace=True)
wind_data.rename(columns={'city_name': 'city_code', 'date_time': 'datetime', 'value': 'wind_output'}, inplace=True)

weather_data['datetime'] = pd.to_datetime(weather_data['datetime'], dayfirst=True)
weather_data.set_index(weather_data['datetime'], inplace=True)
weather_data.drop(columns=['datetime'], inplace=True)

solar_data['datetime'] = pd.to_datetime(solar_data['datetime'], dayfirst=True)
solar_data.set_index(solar_data['datetime'], inplace=True)
solar_data.drop(columns=['datetime'], inplace=True)

wind_data['datetime'] = pd.to_datetime(wind_data['datetime'], dayfirst=True)
wind_data.set_index(wind_data['datetime'], inplace=True)
wind_data.drop(columns=['datetime'], inplace=True)

target_city_code = 1508
mask = weather_data['city_code'] == target_city_code
weather_data = weather_data[mask]
mask = solar_data['city_code'] == target_city_code
solar_data = solar_data[mask]
mask = wind_data['city_code'] == target_city_code
wind_data = wind_data[mask]

solar_data = solar_data.resample('h', closed='right', label='right').mean()
wind_data = wind_data.resample('h', closed='right', label='right').mean()

merged_df = pd.concat([weather_data, solar_data, wind_data], axis=1)
merged_df.dropna(inplace=True)
merged_df.drop(columns=['city_code'], inplace=True)

def deal_missing_data(df_data):
    complete_time_range = pd.date_range(
        start=df_data.index.min(),
        end=df_data.index.max(),
        freq='h'
    )
    initial_missing = complete_time_range.difference(df_data.index)
    print(f"Initial missing timestamps: {len(initial_missing)}")
    if len(initial_missing) == 0:
        print("No need to deal missing data.")
        return df_data

    print("Dealing with missing data.")
    print("Initial missing dates:", pd.Series(initial_missing.date).value_counts())

    df_reindexed = df_data.reindex(complete_time_range)
    df_reindexed['date'] = df_reindexed.index.date
    missing_flag = df_reindexed.drop(columns='date').isna().any(axis=1)
    missing_count_per_day = missing_flag.groupby(df_reindexed['date']).sum()
    print("Missing timestamps per day:")
    print(missing_count_per_day)

    threshold = 8
    days_to_drop = missing_count_per_day[missing_count_per_day > threshold].index
    print(f"\nDays to drop: {len(days_to_drop)}")

    df_cleaned = df_reindexed[~df_reindexed['date'].isin(days_to_drop)].copy()
    df_cleaned.drop(columns='date', inplace=True)
    
    df_filled = df_cleaned.interpolate(method='time')

    expected_range = pd.date_range(
        start=df_filled.index.min(),
        end=df_filled.index.max(),
        freq='h'
    )
    missing_timestamps = expected_range.difference(df_filled.index)
    print(f"Total missing timestamps: {len(missing_timestamps)}")
    print("\nMissing timestamps by date:")
    missing_dates = pd.Series(missing_timestamps.date).value_counts().sort_index()
    print(missing_dates)
    
    return df_filled

df_cleaned = deal_missing_data(merged_df)

Initial missing timestamps: 48
Dealing with missing data.
Initial missing dates: 2025-02-13    24
2025-02-12    23
2025-02-14     1
Name: count, dtype: int64
Missing timestamps per day:
date
2024-01-01     0
2024-01-02     0
2024-01-03     0
2024-01-04     0
2024-01-05     0
              ..
2025-02-11     0
2025-02-12    23
2025-02-13    24
2025-02-14     1
2025-02-15     0
Length: 412, dtype: int64

Days to drop: 2
Total missing timestamps: 48

Missing timestamps by date:
2025-02-12    24
2025-02-13    24
Name: count, dtype: int64


In [3]:
print("Before normalized:")
print(df_cleaned.iloc[10:15])

scalers = {}

columns_to_scale = [col for col in df_cleaned.columns]
normalized_df = df_cleaned.copy()

for col in columns_to_scale:
    if 'output' in col:
        scaler = MinMaxScaler()
    else:
        scaler = StandardScaler()

    normalized_df[col] = scaler.fit_transform(normalized_df[[col]])
    scalers[col] = scaler

with open('../model/scalers.pkl', 'wb') as f:
    pickle.dump(scalers, f)

normalized_df['datetime'] = normalized_df.index

print("\nAfter normalized:")
print(normalized_df.iloc[10:15])

normalized_df.to_csv("../output/normalized_df.csv", index=True, encoding="utf-8-sig")

Before normalized:
                        t2m   ws10m  ws100m      ssrd   tp      rh  \
2024-01-01 11:00:00 -5.3400  5.4026  7.4081  286.9511  0.0  0.5627   
2024-01-01 12:00:00 -4.8384  5.8687  7.7351  388.4978  0.0  0.5411   
2024-01-01 13:00:00 -0.0111  6.0763  7.9036  439.6089  0.0  0.4029   
2024-01-01 14:00:00  0.2727  6.1327  7.9847  432.9600  0.0  0.3896   
2024-01-01 15:00:00  0.5780  5.9767  7.8884  367.9467  0.0  0.3798   

                     solar_output  wind_output  
2024-01-01 11:00:00       946.277     2549.727  
2024-01-01 12:00:00      1294.318     2402.862  
2024-01-01 13:00:00      1230.975     2602.524  
2024-01-01 14:00:00      1227.397     2706.338  
2024-01-01 15:00:00      1165.689     2813.120  

After normalized:
                          t2m     ws10m    ws100m      ssrd        tp  \
2024-01-01 11:00:00 -0.953372  1.382216  0.904447  0.385923 -0.106259   
2024-01-01 12:00:00 -0.917707  1.656826  1.023772  0.775450 -0.106259   
2024-01-01 13:00:00 -0.57447

In [4]:
normalized_df.columns

# 数据验证函数
def validate_dataset(df, name="数据集"):
    """验证数据集的完整性"""
    print(f"\n{name}验证:")
    print(f"总行数: {len(df)}")
    print(f"缺失值统计:")
    print(df.isnull().sum())
    print(f"\n数值范围:")
    print(df.describe())
    return not df.isnull().any().any()

# 在创建数据集之前进行验证
validate_dataset(normalized_df, "规范化数据")


规范化数据验证:
总行数: 9816
缺失值统计:
t2m             0
ws10m           0
ws100m          0
ssrd            0
tp              0
rh              0
solar_output    0
wind_output     0
datetime        0
dtype: int64

数值范围:
               t2m         ws10m        ws100m          ssrd           tp  \
count  9816.000000  9.816000e+03  9.816000e+03  9.816000e+03  9816.000000   
mean      0.000000 -3.691695e-16 -4.082581e-16  1.737268e-16     0.000000   
min      -2.391871 -1.788015e+00 -1.780387e+00 -7.148030e-01    -0.106259   
25%      -0.901206 -7.064284e-01 -8.008784e-01 -7.148030e-01    -0.106259   
50%       0.056376 -2.293824e-01 -1.159584e-01 -6.960155e-01    -0.106259   
75%       0.868079  5.554870e-01  6.987222e-01  6.488299e-01    -0.106259   
max       2.109066  5.078630e+00  4.381639e+00  3.033913e+00    49.624781   
std       1.000051  1.000051e+00  1.000051e+00  1.000051e+00     1.000051   

                 rh  solar_output  wind_output                       datetime  
count  9.816000e+

True

In [5]:
# 数据预处理验证
if normalized_df.isnull().any().any():
    print("警告：数据中存在缺失值")
    normalized_df = normalized_df.fillna(method='ffill').fillna(method='bfill')

# 创建时间索引
normalized_df['time_idx'] = np.arange(1, len(normalized_df) + 1)
normalized_df['group_id'] = '0'
normalized_df['month'] = normalized_df['datetime'].dt.month.astype(str)

# 数据重构为长格式
df_long = normalized_df.melt(
    id_vars=['datetime', 'time_idx', 't2m', 'ws10m', 'ws100m', 'ssrd', 'tp', 'rh'],
    value_vars=['solar_output', 'wind_output'],
    var_name='output_type',
    value_name='target'
)

# 数据验证函数
def validate_dataset(df, name="数据集"):
    """验证数据集的完整性"""
    print(f"\n{name}验证:")
    print(f"总行数: {len(df)}")
    print(f"缺失值统计:")
    print(df.isnull().sum())
    print(f"\n数值范围:")
    print(df.describe())
    return not df.isnull().any().any()

# 在创建数据集之前进行验证
validate_dataset(normalized_df, "规范化数据")
validate_dataset(df_long, "长格式数据")

# 模型参数设置
max_encoder_length = 24   # 历史输入长度（例如过去24个时间步）
max_prediction_length = 6 # 预测未来6个时间步

# 为了保证每个时间序列足够长，需要保证：min_encoder_length + min_prediction_length <= 序列总长度
min_encoder_length = max_encoder_length  # 此处固定为 max_encoder_length
min_prediction_length = max_prediction_length

training_cutoff = df_long["time_idx"].max() - max_prediction_length

# 数据集划分
training_data = df_long[df_long["time_idx"] <= training_cutoff].copy()

# 创建训练数据集
training_dataset = TimeSeriesDataSet(
    normalized_df[normalized_df['time_idx'] <= training_cutoff],
    time_idx="time_idx",
    target='solar_output',
    group_ids=["group_id"],
    min_encoder_length=min_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=min_prediction_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["group_id"],
    time_varying_known_categoricals=["month"],
    time_varying_known_reals=["time_idx", "t2m", "ws10m", "ws100m", "ssrd", "tp", "rh"],
    time_varying_unknown_reals=["solar_output"],
    target_normalizer=GroupNormalizer(
        groups=["group_id"],
        transformation="softplus"
    ),
    add_relative_time_idx=True,
    add_encoder_length=True,
    add_target_scales=True,      # 添加目标尺度信息（用于逆归一化）
    allow_missing_timesteps=True  # 允许缺失时间步
)

# 创建验证数据集
validation_dataset = TimeSeriesDataSet.from_dataset(
    training_dataset, 
    normalized_df, 
    predict=True, 
    stop_randomization=True
)

# 创建数据加载器
batch_size = 64
train_dataloader = training_dataset.to_dataloader(
    train=True, 
    batch_size=batch_size, 
    num_workers=7,
    pin_memory=True
)
val_dataloader = validation_dataset.to_dataloader(
    train=False, 
    batch_size=batch_size, 
    num_workers=7,
    pin_memory=True,
    persistent_workers=True
)

# 添加数据验证步骤
print("\n数据集信息:")
print(f"训练集大小: {len(training_dataset)}")
print(f"验证集大小: {len(validation_dataset)}")

# 创建TFT模型
tft = TemporalFusionTransformer.from_dataset(
    training_dataset,
    learning_rate=0.01,          
    hidden_size=32,              
    attention_head_size=2,       
    dropout=0.2,                 
    hidden_continuous_size=16,   
    output_size=7,
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# 配置训练器
trainer = pl.Trainer(
    max_epochs=30,
    accelerator="gpu" if torch.cuda.is_available() else "cpu",
    devices=1,
    callbacks=[
        EarlyStopping(
            monitor="val_loss",
            min_delta=1e-4,
            patience=10,
            verbose=True,
            mode="min"
        ),
        LearningRateMonitor()
    ],
    logger=True,
    enable_progress_bar=True
)

# 训练模型
try:
    trainer.fit(
        model=tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader
    )
    print("\n模型训练完成")
except Exception as e:
    print(f"训练过程中发生错误:\n{str(e)}")
    print(f"PyTorch Lightning 版本: {pl.__version__}")
    print(f"PyTorch 版本: {torch.__version__}")
    raise

# 预测
if trainer.is_global_zero:
    try:
        raw_predictions, x = tft.predict(
            val_dataloader,
            mode="raw",
            return_x=True,
            trainer_kwargs={"accelerator": "gpu" if torch.cuda.is_available() else "cpu"}
        )
        
        # 输出预测结果
        print("\n预测完成")
        print(f"预测结果 shape: {raw_predictions['prediction'].shape}")
        print("预测时间范围:",
              normalized_df[normalized_df['time_idx'] > training_cutoff]['datetime'].min(),
              "至",
              normalized_df[normalized_df['time_idx'] > training_cutoff]['datetime'].max())
    except Exception as e:
        print(f"预测过程中发生错误: {str(e)}")
        raise

/opt/anaconda3/envs/TFT/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/opt/anaconda3/envs/TFT/lib/python3.9/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



规范化数据验证:
总行数: 9816
缺失值统计:
t2m             0
ws10m           0
ws100m          0
ssrd            0
tp              0
rh              0
solar_output    0
wind_output     0
datetime        0
time_idx        0
group_id        0
month           0
dtype: int64

数值范围:
               t2m         ws10m        ws100m          ssrd           tp  \
count  9816.000000  9.816000e+03  9.816000e+03  9.816000e+03  9816.000000   
mean      0.000000 -3.691695e-16 -4.082581e-16  1.737268e-16     0.000000   
min      -2.391871 -1.788015e+00 -1.780387e+00 -7.148030e-01    -0.106259   
25%      -0.901206 -7.064284e-01 -8.008784e-01 -7.148030e-01    -0.106259   
50%       0.056376 -2.293824e-01 -1.159584e-01 -6.960155e-01    -0.106259   
75%       0.868079  5.554870e-01  6.987222e-01  6.488299e-01    -0.106259   
max       2.109066  5.078630e+00  4.381639e+00  3.033913e+00    49.624781   
std       1.000051  1.000051e+00  1.000051e+00  1.000051e+00     1.000051   

                 rh  solar_output  wind_out

/opt/anaconda3/envs/TFT/lib/python3.9/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/opt/anaconda3/envs/TFT/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | loggi

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/opt/anaconda3/envs/TFT/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:420: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.004


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0001. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.000 >= min_delta = 0.0001. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.000 >= min_delta = 0.0001. New best score: 0.002


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.0001. New best score: 0.001


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.000 >= min_delta = 0.0001. New best score: 0.001


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.001. Signaling Trainer to stop.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



模型训练完成
预测过程中发生错误: too many values to unpack (expected 2)


ValueError: too many values to unpack (expected 2)

训练模型

In [None]:
prediction_len = 4
encoder_len = 30
training_cutoff = normalized_df['datetime'].max() - prediction_len

training = TimeSeriesDataSet(
    normalized_df[lambda x: x['datetime'] <= training_cutoff],
    time_idx='datetime',
    target=['solar_output', 'wind_output'],
    min_encoder_length=encoder_len,
    max_encoder_length=encoder_len,
    min_prediction_length=prediction_len,
    max_prediction_length=prediction_len,
    # 已知的时间变量：时间索引及天气数据（这些数据未来是可知的，如气象预报）
    time_varying_known_reals=['datetime', 't2m', 'ws10m', 'ws100m', 'ssrd', 'tp', 'rh'],
    # 预测目标是未知变量
    time_varying_unknown_reals=['solar_output', 'wind_output'],
    target_normalizer=GroupNormalizer(
        groups=[],  # 不使用分组
        transformation="softplus"
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
)

validation = TimeSeriesDataSet.from_dataset(
    training, normalized_df, predict=True, stop_randomization=True
)

batch_size = 64
train_dataloader = DataLoader(training, batch_size=batch_size, shuffle=True, num_workers=0)
val_dataloader = DataLoader(validation, batch_size=batch_size, shuffle=False, num_workers=0)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,           # 隐藏层维度，可根据数据复杂性调整
    attention_head_size=1,    # 注意力头数
    dropout=0.1,              # Dropout 率，用于防止过拟合
    hidden_continuous_size=8, # 连续变量的隐藏层维度
    output_size=7,            # 输出层维度，对应QuantileLoss中设定的分位数数量
    loss=QuantileLoss(),      # 损失函数
    log_interval=10,
    reduce_on_plateau_patience=4,
)

early_stop_callback = EarlyStopping(
    monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min"
)
lr_logger = LearningRateMonitor()

trainer = pl.Trainer(
    max_epochs=20,
    gpus=1 if torch.cuda.is_available() else 0,
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback, lr_logger],
)

trainer.fit(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)

raw_predictions, x = tft.predict(val_dataloader, mode="raw", return_x=True)

In [None]:
import matplotlib.pyplot as plt

def plot_predictions(raw_predictions, x, idx=0):
    """
    绘制预测结果
    Args:
        raw_predictions: 模型预测的原始结果
        x: 输入数据
        idx: 要显示的样本索引
    """
    plt.figure(figsize=(15, 10))
    
    # 设置中文字体
    plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
    plt.rcParams['axes.unicode_minus'] = False
    
    # 太阳能出力预测
    plt.subplot(2, 1, 1)
    plt.plot(x["encoder_target"][idx, :, 0], label="历史太阳能出力")
    plt.plot(range(encoder_len, encoder_len + prediction_len),
             raw_predictions["prediction"][idx, :, 0, 3],  # 中位数预测
             label="预测太阳能出力")
    plt.title("太阳能出力预测")
    plt.legend()
    
    # 风电出力预测
    plt.subplot(2, 1, 2)
    plt.plot(x["encoder_target"][idx, :, 1], label="历史风电出力")
    plt.plot(range(encoder_len, encoder_len + prediction_len),
             raw_predictions["prediction"][idx, :, 1, 3],  # 中位数预测
             label="预测风电出力")
    plt.title("风电出力预测")
    plt.legend()
    
    plt.tight_layout()
    plt.show()

# 绘制预测结果
plot_predictions(raw_predictions, x)

# 计算预测误差
def calculate_metrics(predictions, actuals):
    """计算预测指标"""
    mse = np.mean((predictions - actuals) ** 2)
    mae = np.mean(np.abs(predictions - actuals))
    return mse, mae

# 分别计算太阳能和风电的预测误差
solar_predictions = raw_predictions["prediction"][:, :, 0, 3]  # 太阳能预测（中位数）
wind_predictions = raw_predictions["prediction"][:, :, 1, 3]   # 风电预测（中位数）
solar_actuals = x["decoder_target"][:, :, 0]
wind_actuals = x["decoder_target"][:, :, 1]

solar_mse, solar_mae = calculate_metrics(solar_predictions, solar_actuals)
wind_mse, wind_mae = calculate_metrics(wind_predictions, wind_actuals)

print("\n预测效果评估:")
print(f"太阳能出力 - MSE: {solar_mse:.4f}, MAE: {solar_mae:.4f}")
print(f"风电出力 - MSE: {wind_mse:.4f}, MAE: {wind_mae:.4f}")