In [None]:
from google.colab import files
uploaded = files.upload()

Saving PdM_forecasting.xlsx to PdM_forecasting.xlsx


**pdm datasets**

In [None]:
import pandas as pd
import numpy as np
import re
from openai import OpenAI
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

client = OpenAI(api_key="sk-1545d86e319f4ba7bf46df9f0c69d90b", base_url="https://api.deepseek.com")

def get_prediction_length(sheet_name):
    match = re.search(r'_(\d+)$', sheet_name)
    if match:
        data_length = int(match.group(1))
        if data_length < 180:
            return 24
        elif data_length < 300:
            return 36
        elif data_length < 1000:
            return 48
        else:
            return 100
    else:
        raise ValueError(f"Invalid sheet name: {sheet_name}")

def extract_float_list(text):
    match = re.search(r'\[([0-9eE+.,\s-]+)\]', text)
    if not match:
        raise ValueError("No valid list found in model response.")
    number_str = match.group(1)
    return [float(n) for n in number_str.strip().split(",") if n.strip() != ""]

def summarize_metrics(df):
    df["MAPE"] = df["MAPE"].abs()
    df["MAE"] = df["MAE"].abs()
    return df[["MAPE", "MAE"]].describe().T

excel_file = "/content/PdM_forecasting.xlsx"
sheet_names = pd.ExcelFile(excel_file).sheet_names

results = []

for sheet_name in sheet_names:
    try:
        df = pd.read_excel(excel_file, sheet_name=sheet_name)
        col = df.select_dtypes(include=['number']).columns[0]
        series = df[col].dropna().tolist()

        pred_len = get_prediction_length(sheet_name)
        if len(series) < pred_len + 5:
            print(f"❌ Skipping {sheet_name}: insufficient data.")
            continue

        input_series = series[:-pred_len]
        target_series = series[-pred_len:]

        prompt = (
            f"I have a time series with {len(input_series)} values.\n"
            f"Series: {input_series}\n"
            f"Please forecast the next {pred_len} values in the same format "
            f"(as a Python list, just the numbers)."
        )

        response = client.chat.completions.create(
            model="deepseek-reasoner",
            messages=[
                {"role": "system", "content": "You are a forecasting expert."},
                {"role": "user", "content": prompt},
            ]
        )

        predicted_str = response.choices[0].message.content.strip()

        try:
            predicted = extract_float_list(predicted_str)
            if len(predicted) != pred_len:
                raise ValueError("Prediction length mismatch.")
        except Exception as e:
            print(f"❌ Prediction parse failed for {sheet_name}: {e}")
            continue

        mae = mean_absolute_error(target_series, predicted)
        mape = mean_absolute_percentage_error(target_series, predicted)

        results.append({
            "Sheet": sheet_name,
            "MAE": mae,
            "MAPE": mape,
        })

        print(f"{sheet_name} → MAE: {mae:.4f}, MAPE: {mape:.4f}")

    except Exception as e:
        print(f"❌ Error in {sheet_name}: {e}")

results_df = pd.DataFrame(results)
results_df.to_csv("/content/deepseek_forecasting_results.csv", index=False)

print("\n📊 Summary of Forecasting Performance:")
print(summarize_metrics(results_df))

volt_200 → MAE: 16.2673, MAPE: 0.0958
volt_500 → MAE: 12.8748, MAPE: 0.0819
❌ Prediction parse failed for volt_2000: No valid list found in model response.
volt_5000 → MAE: 161.2092, MAPE: 0.9503
❌ Prediction parse failed for volt_8761: Prediction length mismatch.
rotate_200 → MAE: 32.6420, MAPE: 0.0729
rotate_500 → MAE: 51.6937, MAPE: 0.1142
❌ Prediction parse failed for rotate_2000: Prediction length mismatch.
❌ Prediction parse failed for rotate_5000: Prediction length mismatch.
rotate_8761 → MAE: 49.4138, MAPE: 0.1079
pressure_200 → MAE: 7.9624, MAPE: 0.0791
❌ Prediction parse failed for pressure_500: Prediction length mismatch.
❌ Prediction parse failed for pressure_2000: Prediction length mismatch.
❌ Prediction parse failed for pressure_5000: Prediction length mismatch.
❌ Prediction parse failed for pressure_8761: Prediction length mismatch.
vibration_200 → MAE: 5.5090, MAPE: 0.1479
❌ Prediction parse failed for vibration_500: Prediction length mismatch.
vibration_2000 → MAE: 5.3

In [None]:
failed_sheets = [
    "volt_2000", "volt_8761",
    "rotate_2000", "rotate_5000",
    "pressure_500", "pressure_2000", "pressure_5000", "pressure_8761",
    "vibration_500", "vibration_5000", "vibration_8761"
]

retry_results = []

for sheet_name in failed_sheets:
    try:
        df = pd.read_excel(excel_file, sheet_name=sheet_name)
        col = df.select_dtypes(include=['number']).columns[0]
        series = df[col].dropna().tolist()

        pred_len = get_prediction_length(sheet_name)
        if len(series) < pred_len + 5:
            print(f"❌ Skipping {sheet_name}: insufficient data.")
            continue

        input_series = series[:-pred_len]
        target_series = series[-pred_len:]

        prompt = (
            f"I have a time series with {len(input_series)} values.\n"
            f"Series: {input_series}\n"
            f"Please forecast the next {pred_len} values in the same format "
            f"(as a Python list, just the numbers)."
        )

        response = client.chat.completions.create(
            model="deepseek-reasoner",
            messages=[
                {"role": "system", "content": "You are a forecasting expert."},
                {"role": "user", "content": prompt},
            ]
        )

        predicted_str = response.choices[0].message.content.strip()

        try:
            predicted = extract_float_list(predicted_str)

            if len(predicted) < pred_len:
                raise ValueError(f"Too short prediction: expected {pred_len}, got {len(predicted)}")
            elif len(predicted) > pred_len:
                print(f"⚠️ Truncating prediction for {sheet_name}: expected {pred_len}, got {len(predicted)}")
                predicted = predicted[:pred_len]

        except Exception as e:
            print(f"❌ Retry parse failed for {sheet_name}: {e}")
            continue

        mae = mean_absolute_error(target_series, predicted)
        mape = mean_absolute_percentage_error(target_series, predicted)

        retry_results.append({
            "Sheet": sheet_name,
            "MAE": mae,
            "MAPE": mape,
        })

        print(f"[RETRY] {sheet_name} → MAE: {mae:.4f}, MAPE: {mape:.4f}")

    except Exception as e:
        print(f"❌ Retry error in {sheet_name}: {e}")

retry_df = pd.DataFrame(retry_results)

⚠️ Truncating prediction for volt_2000: expected 100, got 114
[RETRY] volt_2000 → MAE: 59.6492, MAPE: 0.3580
[RETRY] volt_8761 → MAE: 12.5964, MAPE: 0.0778
[RETRY] rotate_2000 → MAE: 44.2246, MAPE: 0.0982
[RETRY] rotate_5000 → MAE: 149.3804, MAPE: 0.3319
[RETRY] pressure_500 → MAE: 8.6206, MAPE: 0.0917
⚠️ Truncating prediction for pressure_2000: expected 100, got 128
[RETRY] pressure_2000 → MAE: 61.4354, MAPE: 0.6045
[RETRY] pressure_5000 → MAE: 7.7923, MAPE: 0.0784
❌ Retry parse failed for pressure_8761: Too short prediction: expected 100, got 90
[RETRY] vibration_500 → MAE: 5.5560, MAPE: 0.1529
❌ Retry parse failed for vibration_5000: Too short prediction: expected 100, got 98
[RETRY] vibration_8761 → MAE: 5.0189, MAPE: 0.1368


In [None]:
last_failed = ["pressure_8761", "vibration_5000"]

retry2_results = []

for sheet_name in last_failed:
    try:
        df = pd.read_excel(excel_file, sheet_name=sheet_name)
        col = df.select_dtypes(include=['number']).columns[0]
        series = df[col].dropna().tolist()

        pred_len = get_prediction_length(sheet_name)
        if len(series) < pred_len + 5:
            print(f"⚠️ {sheet_name} has very limited data: only {len(series)} total points")

        input_series = series[:-pred_len]
        target_series = series[-pred_len:]

        prompt = (
            f"I have a time series with {len(input_series)} values.\n"
            f"Series: {input_series}\n"
            f"Please forecast the next {pred_len} values in the same format "
            f"(as a Python list, just the numbers)."
        )

        response = client.chat.completions.create(
            model="deepseek-reasoner",
            messages=[
                {"role": "system", "content": "You are a forecasting expert."},
                {"role": "user", "content": prompt},
            ]
        )

        predicted_str = response.choices[0].message.content.strip()

        try:
            predicted = extract_float_list(predicted_str)

            if len(predicted) < pred_len * 0.9:
                raise ValueError(f"Too short prediction: expected {pred_len}, got {len(predicted)}")
            elif len(predicted) < pred_len:
                print(f"⚠️ Accepting shorter prediction for {sheet_name}: expected {pred_len}, got {len(predicted)}")
                target_series = target_series[:len(predicted)]
            elif len(predicted) > pred_len:
                print(f"⚠️ Truncating prediction for {sheet_name}: expected {pred_len}, got {len(predicted)}")
                predicted = predicted[:pred_len]

        except Exception as e:
            print(f"❌ Final parse failed for {sheet_name}: {e}")
            continue

        mae = mean_absolute_error(target_series, predicted)
        mape = mean_absolute_percentage_error(target_series, predicted)

        retry2_results.append({
            "Sheet": sheet_name,
            "MAE": mae,
            "MAPE": mape,
        })

        print(f"[FINAL RUN] {sheet_name} → MAE: {mae:.4f}, MAPE: {mape:.4f}")

    except Exception as e:
        print(f"❌ Final error in {sheet_name}: {e}")

⚠️ Truncating prediction for pressure_8761: expected 100, got 105
[FINAL RUN] pressure_8761 → MAE: 11.1635, MAPE: 0.1196
[FINAL RUN] vibration_5000 → MAE: 5.5541, MAPE: 0.1434


**pdm results**

In [None]:
retry2_df = pd.DataFrame(retry2_results)

all_results_df = pd.concat([results_df, retry_df, retry2_df], ignore_index=True)
all_results_df = all_results_df.drop_duplicates(subset="Sheet", keep="last").reset_index(drop=True)

print("\n✅ Final Combined Forecasting Performance:")
print(summarize_metrics(all_results_df))


✅ Final Combined Forecasting Performance:
      count       mean        std       min       25%        50%        75%  \
MAPE   20.0   0.199342   0.219321  0.072851  0.089239   0.116861   0.149154   
MAE    20.0  35.697327  45.504712  5.018946  7.233217  12.735615  49.983788   

             max  
MAPE    0.950259  
MAE   161.209221  


In [None]:
import zipfile
import os

with zipfile.ZipFile("dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/datasets")

In [None]:
client = OpenAI(api_key="sk-1545d86e319f4ba7bf46df9f0c69d90b", base_url="https://api.deepseek.com")

# Step 1: Determine prediction length from filename
def get_prediction_length(file_name):
    match = re.search(r'_(\d+)', file_name)
    if match:
        data_length = int(match.group(1))
        if data_length < 180:
            return 24
        elif data_length < 300:
            return 36
        elif data_length < 1000:
            return 48
        else:
            return 100
    else:
        raise ValueError(f"Invalid file name: {file_name}")

# Step 2: Extract float list from DeepSeek response
def extract_float_list(text):
    match = re.search(r'\[([0-9eE+.,\s-]+)\]', text)
    if not match:
        raise ValueError("No valid list found in model response.")
    number_str = match.group(1)
    return [float(n) for n in number_str.strip().split(",") if n.strip() != ""]

# Step 3: Summarize evaluation metrics
def summarize_metrics(df):
    df["MAPE"] = df["MAPE"].abs()
    df["MAE"] = df["MAE"].abs()
    return df[["MAPE", "MAE"]].describe().T

results = []

for file_path in csv_files:
    try:
        file_name = os.path.basename(file_path).replace(".csv", "")
        pred_len = get_prediction_length(file_name)

        df = pd.read_csv(file_path)
        col = df.select_dtypes(include=['number']).columns[0]
        series = df[col].dropna().tolist()

        if len(series) < pred_len + 5:
            print(f"❌ Skipping {file_name}: insufficient data.")
            continue

        input_series = series[:-pred_len]
        target_series = series[-pred_len:]

        prompt = (
            f"I have a time series with {len(input_series)} values.\n"
            f"Series: {input_series}\n"
            f"Please forecast the next {pred_len} values in the same format "
            f"(as a Python list, just the numbers)."
        )

        response = client.chat.completions.create(
            model="deepseek-reasoner",
            messages=[
                {"role": "system", "content": "You are a forecasting expert."},
                {"role": "user", "content": prompt},
            ]
        )

        predicted_str = response.choices[0].message.content.strip()

        try:
            predicted = extract_float_list(predicted_str)

            if len(predicted) < pred_len * 0.9:
                raise ValueError(f"Too short prediction: expected {pred_len}, got {len(predicted)}")
            elif len(predicted) < pred_len:
                print(f"⚠️ Accepting shorter prediction for {file_name}: expected {pred_len}, got {len(predicted)}")
                target_series = target_series[:len(predicted)]
            elif len(predicted) > pred_len:
                print(f"⚠️ Truncating prediction for {file_name}: expected {pred_len}, got {len(predicted)}")
                predicted = predicted[:pred_len]

        except Exception as e:
            print(f"❌ Prediction parse failed for {file_name}: {e}")
            continue

        mae = mean_absolute_error(target_series, predicted)
        mape = mean_absolute_percentage_error(target_series, predicted)

        results.append({
            "File": file_name,
            "MAE": mae,
            "MAPE": mape,
        })

        print(f"{file_name} → MAE: {mae:.4f}, MAPE: {mape:.4f}")

    except Exception as e:
        print(f"❌ Error in {file_name}: {e}")

# Final results
results_df = pd.DataFrame(results)

print("\n📊 Summary of Forecasting Performance:")
print(summarize_metrics(results_df))

共找到 80 个 CSV 文件
❌ Error in ETTh2Dataset_DARTS_OT_10000: Error code: 400 - {'error': {'message': "This model's maximum context length is 65536 tokens. However, you requested 69169 tokens (69169 in the messages, 0 in the completion). Please reduce the length of the messages or completion.", 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_request_error'}}
SunspotsDataset_DARTS_2820 → MAE: 71.0920, MAPE: 2.0054
ExchangeRateDataset_DARTS_chn_1000 → MAE: 0.0000, MAPE: 0.0000
ILINetDataset_DARTS_age25to64_364 → MAE: 2391.6875, MAPE: 0.7337
TemperatureDataset_DARTS_200 → MAE: 3.4528, MAPE: 0.5798
USGasolineDataset_DARTS_1578 → MAE: 2698.6200, MAPE: 0.3424
⚠️ Truncating prediction for ETTm1Dataset_DARTS_OT_1000: expected 100, got 114
ETTm1Dataset_DARTS_OT_1000 → MAE: 3.5349, MAPE: 0.1191
❌ Error in ETTm1Dataset_DARTS_OT_10000: Error code: 400 - {'error': {'message': "This model's maximum context length is 65536 tokens. However, you requested 69135 tokens (69135 in the messages,

In [None]:
client = OpenAI(api_key="sk-1545d86e319f4ba7bf46df9f0c69d90b", base_url="https://api.deepseek.com")

def get_prediction_length(file_name):
    match = re.search(r'_(\d+)', file_name)
    if match:
        data_length = int(match.group(1))
        if data_length < 180:
            return 24
        elif data_length < 300:
            return 36
        elif data_length < 1000:
            return 48
        else:
            return 100
    else:
        raise ValueError(f"Invalid file name: {file_name}")

def extract_float_list(text):
    match = re.search(r'\[([0-9eE+.,\s-]+)\]', text)
    if not match:
        raise ValueError("No valid list found in model response.")
    number_str = match.group(1)
    return [float(n) for n in number_str.strip().split(",") if n.strip() != ""]

retry_files = [
    "ETTh2Dataset_DARTS_OT_10000",
    "ETTm1Dataset_DARTS_OT_10000",
    "ETTm2Dataset_DARTS_OT_10000",
    "ETTh1Dataset_DARTS_OT_17420",
    "ETTm2Dataset_DARTS_OT_20000",
    "ETTm1Dataset_DARTS_OT_20000",
    "ETTh2Dataset_DARTS_OT_17420",
    "ETTh1Dataset_DARTS_OT_10000",
    "EnergyDataset_DARTS_solar_10000",
    "AirQuality_UCI_ML_Repo_O3_9357"
]

csv_files = glob.glob("/content/datasets/*.csv")

retry_results = []
max_input_len = 2000

for name in retry_files:
    file_path = [f for f in csv_files if name in f]
    if not file_path:
        print(f"❌ File not found for {name}")
        continue

    try:
        df = pd.read_csv(file_path[0])
        col = df.select_dtypes(include=['number']).columns[0]
        series = df[col].dropna().tolist()

        pred_len = get_prediction_length(name)

        if len(series) < pred_len + 10:
            print(f"❌ Skipping {name}: not enough data.")
            continue

        input_series = series[-(pred_len + max_input_len):-pred_len]
        target_series = series[-pred_len:]

        prompt = (
            f"Forecast the next {pred_len} values based on this series:\n"
            f"{input_series}\nReturn only a list."
        )

        response = client.chat.completions.create(
            model="deepseek-reasoner",
            messages=[
                {"role": "system", "content": "You are a forecasting expert."},
                {"role": "user", "content": prompt},
            ]
        )

        predicted_str = response.choices[0].message.content.strip()

        try:
            predicted = extract_float_list(predicted_str)

            if len(predicted) < pred_len * 0.9:
                raise ValueError(f"Too short: expected {pred_len}, got {len(predicted)}")
            elif len(predicted) > pred_len:
                print(f"⚠️ Truncating prediction for {name}: expected {pred_len}, got {len(predicted)}")
                predicted = predicted[:pred_len]
            elif len(predicted) < pred_len:
                print(f"⚠️ Accepting shorter prediction for {name}: expected {pred_len}, got {len(predicted)}")
                target_series = target_series[:len(predicted)]

        except Exception as e:
            print(f"❌ Prediction parse failed for {name}: {e}")
            continue

        mae = mean_absolute_error(target_series, predicted)
        mape = mean_absolute_percentage_error(target_series, predicted)

        retry_results.append({
            "Dataset": name,
            "MAE": mae,
            "MAPE": mape
        })

        print(f"[RETRY] {name} → MAE: {mae:.4f}, MAPE: {mape:.4f}")

    except Exception as e:
        print(f"❌ Retry error in {name}: {e}")

retry_df = pd.DataFrame(retry_results)

⚠️ Truncating prediction for ETTh2Dataset_DARTS_OT_10000: expected 100, got 180
[RETRY] ETTh2Dataset_DARTS_OT_10000 → MAE: 23.3843, MAPE: 0.6358
⚠️ Truncating prediction for ETTm1Dataset_DARTS_OT_10000: expected 100, got 110
[RETRY] ETTm1Dataset_DARTS_OT_10000 → MAE: 1.3815, MAPE: 0.0774
⚠️ Truncating prediction for ETTm2Dataset_DARTS_OT_10000: expected 100, got 101
[RETRY] ETTm2Dataset_DARTS_OT_10000 → MAE: 1.5431, MAPE: 0.0598
⚠️ Truncating prediction for ETTh1Dataset_DARTS_OT_17420: expected 100, got 106
[RETRY] ETTh1Dataset_DARTS_OT_17420 → MAE: 3.8570, MAPE: 0.4278
⚠️ Truncating prediction for ETTm2Dataset_DARTS_OT_20000: expected 100, got 106
[RETRY] ETTm2Dataset_DARTS_OT_20000 → MAE: 3.2072, MAPE: 0.2191
⚠️ Truncating prediction for ETTm1Dataset_DARTS_OT_20000: expected 100, got 106
[RETRY] ETTm1Dataset_DARTS_OT_20000 → MAE: 2.6885, MAPE: 0.4488
⚠️ Truncating prediction for ETTh2Dataset_DARTS_OT_17420: expected 100, got 122
[RETRY] ETTh2Dataset_DARTS_OT_17420 → MAE: 3.5970, MAPE

**80 datasets results**

In [None]:
results_df = results_df.rename(columns={"File": "Dataset"})
retry_df = retry_df.rename(columns={"dataset": "Dataset"})

combined_df = pd.concat([results_df, retry_df], ignore_index=True)
combined_df = combined_df.drop_duplicates(subset="Dataset", keep="last").reset_index(drop=True)

summary = combined_df[["MAE", "MAPE"]].describe().T
print(summary)

      count          mean           std  min       25%        50%         75%  \
MAE    80.0  7.242256e+03  5.571456e+04  0.0  1.423198  10.100074  724.172500   
MAPE   80.0  9.538874e+10  8.531828e+11  0.0  0.059472   0.256606    0.536683   

               max  
MAE   4.987259e+05  
MAPE  7.631099e+12  
