In [11]:
import os
import json

import pandas as pd
import numpy as np
import plotly.graph_objects as go

## Строим графики полученных результатов

In [12]:
def load_json(path: str) -> pd.DataFrame:
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
    except:
        with open(path, "r") as f:
            data = json.load(f)
    

    result = {
        "trial": [],
        "mse_val": [],
        "mse_test": [],
        "hypothesis": []
    }
    for d in data:
        result["trial"].append(d.get("trial"))
        result["mse_val"].append(d.get("mse_val"))
        result["mse_test"].append(d.get("mse_test", d.get("mse_val")))
        result["hypothesis"].append(d.get("hypothesis"))


    return pd.DataFrame(result)

In [13]:
llm_nas_dir = "experiments\llm_opt"

In [14]:
etth_1_path = {
    "24": {
        "llm_grid_12": "experiments\llm_opt\etth1\pred_len=24;th_b=8192;hypot.json",
        "llm_grid_3": "experiments\llm_opt\etth1\ETTh1-grid3_pred_len=24;th_b=8192;hypot.json",
        "optuna": "experiments\optuna_results\etth1\pred_len=24;OPTUNA.json"
    },
    "48":{
        "llm_grid_12": "experiments\llm_opt\etth1\pred_len=48;th_b=8192;hypot.json",
        "llm_grid_3": "experiments\llm_opt\etth1\ETTh1-grid3_pred_len=48;th_b=8192;hypot.json",
        "optuna": "experiments\optuna_results\etth1\pred_len=48;OPTUNA.json"
    },
    "168":{
        "llm_grid_12": "experiments\llm_opt\etth1\pred_len=168;th_b=8192;hypot.json",
        "llm_grid_3": "experiments\llm_opt\etth1\ETTh1-grid3_pred_len=168;th_b=8192;hypot.json",
        "optuna": "experiments\optuna_results\etth1\pred_len=168;OPTUNA.json"
    },
}


etth_2_path = {
    "24": {
        "llm_grid_12": "experiments\llm_opt\etth2\ETTh2-pred_len=24;th_b=8192;hypot.json",
        "llm_grid_3": "experiments\llm_opt\etth2\ETTh2-grid3_pred_len=24;th_b=8192;hypot.json",
        "optuna": "experiments\optuna_results\etth2\ETTh2;pred_len=24;OPTUNA.json"
    },
    "48":{
        "llm_grid_12": "experiments\llm_opt\etth2\ETTh2-pred_len=48;th_b=8192;hypot.json",
        "llm_grid_3": "experiments\llm_opt\etth2\ETTh2-grid3_pred_len=48;th_b=8192;hypot.json",
        "optuna": "experiments\optuna_results\etth2\ETTh2;pred_len=48;OPTUNA.json"
    },
    "168":{
        "llm_grid_12": "experiments\llm_opt\etth2\ETTh2-pred_len=168;th_b=8192;hypot.json",
        "llm_grid_3": "experiments\llm_opt\etth2\ETTh2-grid3_pred_len=168;th_b=8192;hypot.json",
        "optuna": "experiments\optuna_results\etth2\ETTh2;pred_len=168;OPTUNA.json"
    },
}

In [15]:
data_etth_1 = {
    pred_len: {
        key: load_json(etth_1_path[pred_len][key])
        for key in etth_1_path[pred_len].keys()
    }
    for pred_len in etth_1_path.keys()
}

data_etth_2 = {
    pred_len: {
        key: load_json(etth_2_path[pred_len][key])
        for key in etth_2_path[pred_len].keys()
    }
    for pred_len in etth_2_path.keys()
}


### График ошибки `MSE` для датасета `ETTH1`

In [None]:
def plot_graph(data: dict, title: str):
    fig = go.Figure()
    min_mse = []
    for key in data.keys():
        min_mse.append(data[key]["mse_test"].min())
        if "optuna" in key:
            fig.add_trace(
                go.Scatter(
                    x=data[key]["trial"] + 1,
                    y=data[key]["mse_test"],
                    mode='lines+markers',
                    name=key,
                    line_width=3,
                )
            )
        else:
            fig.add_trace(
                go.Scatter(
                    x=data[key]["trial"],
                    y=data[key]["mse_test"],
                    mode='lines+markers',
                    name=key,
                    line_width=3,
                )
            )
        

    # Добавление горизонтальной минимальной MSE
    fig.add_hline(
        y=min(min_mse),
        line_dash="dash",
        # line_color="gray",
        line_color = "green",
        line_width=3,
        annotation_text=f"Минимальное значение MSE = {min(min_mse):.3f}",
        annotation_position="top right"
    )

    fig.update_layout(
        title=title,
        xaxis_title='Trail',
        yaxis_title='MSE',
        yaxis=dict(
            # range=[0, 1],
            tickmode='linear',
            dtick=0.05
        ),
        # legend_title='Название эксперимента',
        # hovermode='x unified',
        width=1000,    # Ширина в пикселях
        height=400     # Высота в пикселях
    )
    return fig

In [31]:
for pred_len in data_etth_1.keys():
    fig = plot_graph(
        data = data_etth_1[pred_len],
        title = f"График MSE при поиске гиперпараметров для ETTH1 и pred_len: {pred_len}"
    )
    fig.show()
    fig.write_image(f"image/ETTH1_pred_len_{pred_len}.png", width = 1000, height = 400)  # векторный формат
    

### График ошибки `MSE` для датасета `ETTH2`

In [32]:
for pred_len in data_etth_2.keys():
    fig = plot_graph(
        data = data_etth_2[pred_len],
        title = f"График MSE при поиске гиперпараметров для ETTH2 и pred_len: {pred_len}"
    )
    fig.show()
    fig.write_image(f"image/ETTH2_pred_len_{pred_len}.png", width = 1000, height = 400)  # векторный формат


## Анализ гипотез

In [19]:
data_best_hypot = {
    "hypothesis": [],
    "best_metric": [],
    "pred_len": [],
    "dataset": [],
    "grid": []
}


for pred_len in data_etth_1.keys():
    for key in data_etth_1[pred_len].keys():
        if "optuna" not in key:
            d = data_etth_1[pred_len][key].copy()
            d = d[d["mse_test"] == d["mse_test"].min()]
            # display(d)

            data_best_hypot["hypothesis"].append(d["hypothesis"].values[0])
            data_best_hypot["best_metric"].append(d["mse_test"].values[0])
            data_best_hypot["pred_len"].append(pred_len)
            data_best_hypot["dataset"].append("ETTH_1")

            if "grid_3" in key:
                data_best_hypot["grid"].append("grid_3")
            else:
                data_best_hypot["grid"].append("grid_1_2")


for pred_len in data_etth_2.keys():
    for key in data_etth_2[pred_len].keys():
        if "optuna" not in key:
            d = data_etth_2[pred_len][key].copy()
            d = d[d["mse_test"] == d["mse_test"].min()]
            # display(d)

            data_best_hypot["hypothesis"].append(d["hypothesis"].values[0])
            data_best_hypot["best_metric"].append(d["mse_test"].values[0])
            data_best_hypot["pred_len"].append(pred_len)
            data_best_hypot["dataset"].append("ETTH_2")

            if "grid_3" in key:
                data_best_hypot["grid"].append("grid_3")
            else:
                data_best_hypot["grid"].append("grid_1_2")


data_best_hypot = pd.DataFrame(data_best_hypot)
data_best_hypot


Unnamed: 0,hypothesis,best_metric,pred_len,dataset,grid
0,Эта конфигурация исследует сочетание максималь...,0.082383,24,ETTH_1,grid_1_2
1,"Мы исследуем, может ли комбинация максимальной...",0.058722,24,ETTH_1,grid_3
2,"Эта конфигурация проверяет, является ли максим...",0.175862,48,ETTH_1,grid_1_2
3,Эта конфигурация проверяет эффективность миним...,0.126148,48,ETTH_1,grid_3
4,Combining the maximum effective context (720) ...,0.144801,168,ETTH_1,grid_1_2
5,Эта конфигурация максимизирует модельную емкос...,0.157421,168,ETTH_1,grid_3
6,Продолжая эксплуатировать успешную стратегию м...,0.138811,24,ETTH_2,grid_1_2
7,This configuration tests maximum model width (...,0.157508,24,ETTH_2,grid_3
8,Данная конфигурация исследует эффективность ко...,0.229513,48,ETTH_2,grid_1_2
9,"Конфигурация использует короткое входное окно,...",0.254768,48,ETTH_2,grid_3


In [34]:
print(data_best_hypot["hypothesis"].to_markdown())

|    | hypothesis                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
|---:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [21]:
data_worst_hypot = {
    "hypothesis": [],
    "best_metric": [],
    "pred_len": [],
    "dataset": [],
    "grid": []
}


for pred_len in data_etth_1.keys():
    for key in data_etth_1[pred_len].keys():
        if "optuna" not in key:
            d = data_etth_1[pred_len][key].copy()
            d = d[d["mse_test"] == d["mse_test"].max()]
            # display(d)

            data_worst_hypot["hypothesis"].append(d["hypothesis"].values[0])
            data_worst_hypot["best_metric"].append(d["mse_test"].values[0])
            data_worst_hypot["pred_len"].append(pred_len)
            data_worst_hypot["dataset"].append("ETTH_1")

            if "grid_3" in key:
                data_worst_hypot["grid"].append("grid_3")
            else:
                data_worst_hypot["grid"].append("grid_1_2")


for pred_len in data_etth_2.keys():
    for key in data_etth_2[pred_len].keys():
        if "optuna" not in key:
            d = data_etth_2[pred_len][key].copy()
            d = d[d["mse_test"] == d["mse_test"].max()]
            # display(d)

            data_worst_hypot["hypothesis"].append(d["hypothesis"].values[0])
            data_worst_hypot["best_metric"].append(d["mse_test"].values[0])
            data_worst_hypot["pred_len"].append(pred_len)
            data_worst_hypot["dataset"].append("ETTH_2")

            if "grid_3" in key:
                data_worst_hypot["grid"].append("grid_3")
            else:
                data_worst_hypot["grid"].append("grid_1_2")


data_worst_hypot = pd.DataFrame(data_worst_hypot)
data_worst_hypot


Unnamed: 0,hypothesis,best_metric,pred_len,dataset,grid
0,Данная конфигурация исследует эффект максималь...,0.468137,24,ETTH_1,grid_1_2
1,Мы тестируем максимальную емкость модели (d_mo...,0.356938,24,ETTH_1,grid_3
2,Эта конфигурация использует умеренно длинный в...,0.660882,48,ETTH_1,grid_1_2
3,Эта конфигурация использует глубокий кодировщи...,0.387822,48,ETTH_1,grid_3
4,By combining the maximum context length (720) ...,0.364472,168,ETTH_1,grid_1_2
5,Этот сетап тестирует способность модели средне...,0.435401,168,ETTH_1,grid_3
6,Используя очень длинную последовательность (72...,0.449198,24,ETTH_2,grid_1_2
7,We test if maximizing architectural depth (e_l...,0.732229,24,ETTH_2,grid_3
8,Проверяем эффективность использования максимал...,0.634785,48,ETTH_2,grid_1_2
9,"Оценка того, может ли максимальный входной кон...",0.682263,48,ETTH_2,grid_3
