In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
from datetime import datetime
import csv
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.models import TFTModel
from darts.metrics import mape
from darts.utils.statistics import check_seasonality, plot_acf
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.utils.likelihood_models import QuantileRegression
import warnings
warnings.filterwarnings("ignore")
import logging
logging.disable(logging.CRITICAL)

ModuleNotFoundError: No module named 'darts'

In [2]:
pip install torch

Collecting torchNote: you may need to restart the kernel to use updated packages.

  Downloading torch-2.3.1-cp312-cp312-win_amd64.whl.metadata (26 kB)
Collecting filelock (from torch)
  Downloading filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy (from torch)
  Downloading sympy-1.12.1-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Downloading networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
Collecting mkl<=2021.4.0,>=2021.1.1 (from torch)
  Downloading mkl-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.4 kB)
Collecting intel-openmp==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Downloading intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl.metadata (1.2 kB)
Collecting tbb==2021.* (from mkl<=2021.4.0,>=2021.1.1->torch)
  Downloading tbb-2021.13.0-py3-none-win


[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


   ------------------------------- ------ 187.1/228.5 MB 622.9 kB/s eta 0:01:07
   ------------------------------- ------ 187.1/228.5 MB 622.9 kB/s eta 0:01:07
   ------------------------------- ------ 187.7/228.5 MB 633.1 kB/s eta 0:01:05
   ------------------------------- ------ 187.8/228.5 MB 634.4 kB/s eta 0:01:05
   ------------------------------- ------ 187.9/228.5 MB 638.8 kB/s eta 0:01:04
   ------------------------------- ------ 187.9/228.5 MB 640.0 kB/s eta 0:01:04
   ------------------------------- ------ 188.0/228.5 MB 640.0 kB/s eta 0:01:04
   ------------------------------- ------ 188.0/228.5 MB 640.6 kB/s eta 0:01:04
   ------------------------------- ------ 188.1/228.5 MB 660.0 kB/s eta 0:01:02
   ------------------------------- ------ 188.2/228.5 MB 675.6 kB/s eta 0:01:00
   ------------------------------- ------ 188.2/228.5 MB 682.0 kB/s eta 0:01:00
   ------------------------------- ------ 188.3/228.5 MB 686.3 kB/s eta 0:00:59
   ------------------------------- -----

In [58]:
# before starting, we define some constants
num_samples = 200

figsize = (9, 6)
lowest_q, low_q, high_q, highest_q = 0.01, 0.1, 0.9, 0.99
label_q_outer = f"{int(lowest_q * 100)}-{int(highest_q * 100)}th percentiles"
label_q_inner = f"{int(low_q * 100)}-{int(high_q * 100)}th percentiles"

In [3]:
file_path = r'C:\Users\user\projects\cached_data_1.csv'
df = pd.read_csv(file_path, index_col=0)

In [4]:
def read_variables(file_path):
    with open(file_path, 'r') as file:
        variables = file.read().strip().split(', ')
    return variables
variables = read_variables('variables.txt')


In [24]:
df.columns = [*df.columns[:-1], 'time']
time_col = df.columns[-1]
value_cols = df.columns[:-1]
df = df.drop_duplicates(subset=[time_col])
    # Проверка и очистка данных временного столбца
def clean_time_col(time_series):
    return pd.to_datetime(time_series, errors='coerce')

df[time_col] = clean_time_col(df[time_col])
df = df.dropna(subset=[time_col])  # Drop rows with NaT values

In [6]:
min_time = df[time_col].min()
max_time = df[time_col].max()
training_start = pd.Timestamp(min_time + pd.Timedelta(hours=22, minutes=38, seconds=15))
time_interval = pd.Timedelta(hours=2, minutes=14, seconds=36) - pd.Timedelta(hours=22, minutes=38, seconds=15)
training_cutoff = training_start + (time_interval / 2)
regular_time_index = pd.date_range(start=min_time, end=max_time, freq='S')
def interpolate_series(df, time_col, value_col, regular_time_index):
    # Создание временного ряда с пропущенными временными метками
    ts = pd.Series(df[value_col].values, index=df[time_col])
    # Ресемплирование и интерполяция данных
    ts_interpolated = ts.reindex(regular_time_index).interpolate(method='time')
    return ts_interpolated


In [7]:
interpolated_series_list = []
for col in value_cols:
    interpolated_series = interpolate_series(df, time_col, col, regular_time_index)
    interpolated_series_list.append(TimeSeries.from_series(interpolated_series))

In [8]:
series_list = []
for series in interpolated_series_list:
    series = series / TimeSeries.from_series(series.time_index.days_in_month)
    series = series.astype(np.float32)
    series_list.append(series)

In [9]:
# Функция для разделения на тренировочный и валидационный наборы
def split_series(series, cutoff):
    return series.split_after(cutoff)

In [10]:
train_val_series = [split_series(series, training_cutoff) for series in series_list]

In [11]:
# Функция для нормализации временных рядов
def normalize_series(train, val):
    transformer = Scaler()
    train_transformed = transformer.fit_transform(train)
    val_transformed = transformer.transform(val)
    return train_transformed, val_transformed, transformer

In [12]:
normalized_series = [normalize_series(train, val) for train, val in train_val_series]

In [13]:
train_transformed_list = [train for train, val, transformer in normalized_series]
val_transformed_list = [val for train, val, transformer in normalized_series]
transformers = [transformer for train, val, transformer in normalized_series]

In [14]:
covariates = datetime_attribute_timeseries(series_list[0], attribute="hour", one_hot=False)
covariates += datetime_attribute_timeseries(series_list[0], attribute="minute", one_hot=False)
covariates += datetime_attribute_timeseries(series_list[0], attribute="second", one_hot=False)
covariates = covariates.stack(datetime_attribute_timeseries(series_list[0], attribute="hour", one_hot=False))
covariates = covariates.stack(datetime_attribute_timeseries(series_list[0], attribute="minute", one_hot=False))
covariates = covariates.stack(datetime_attribute_timeseries(series_list[0], attribute="second", one_hot=False))
covariates = covariates.astype(np.float32)

In [15]:
scaler_covs = Scaler()
covariates_transformed_list = []

# Разделение ковариатов на обучающий и валидационный наборы
cov_train, cov_val = covariates.split_after(training_cutoff)
scaler_covs.fit(cov_train)


# Преобразование ковариатов в соответствии с обучающим набором
cov_train_transformed = scaler_covs.transform(cov_train)
cov_val_transformed = scaler_covs.transform(cov_val)

# Подготовка списка ковариатов для всех временных рядов
covariates_transformed_list = []
for _ in range(len(train_transformed_list)):
    covariates_transformed_list.append(cov_train_transformed)

In [16]:
print("Length of train_transformed_list (number of series):", len(train_transformed_list))
print("Length of covariates_transformed_list (number of series):", len(covariates_transformed_list))

Length of train_transformed_list (number of series): 12
Length of covariates_transformed_list (number of series): 12


In [17]:
# default quantiles for QuantileRegression
quantiles = [
    0.01,
    0.05,
    0.1,
    0.15,
    0.2,
    0.25,
    0.3,
    0.4,
    0.5,
    0.6,
    0.7,
    0.75,
    0.8,
    0.85,
    0.9,
    0.95,
    0.99,
]

In [18]:
input_chunk_length = 24
forecast_horizon = 12

In [19]:
my_model = TFTModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=forecast_horizon,
    hidden_size=64,
    lstm_layers=1,
    num_attention_heads=4,
    dropout=0.1,
    batch_size=16,
    n_epochs=10,
    add_relative_index=False,
    add_encoders=None,
    likelihood=QuantileRegression(
        quantiles=quantiles
    ),  # QuantileRegression is set per default
    # loss_fn=MSELoss(),
    random_state=42,
)

In [23]:
print("Length of train_transformed_list (number of series):", train_transformed_list)
print("Length of covariates_transformed (number of series):", covariates_transformed_list)

Length of covariates_transformed (number of series): [<TimeSeries (DataArray) (time: 44786, component: 4, sample: 1)> Size: 717kB
array([[[0.        ],
        [0.        ],
        [0.        ],
        [0.        ]],

       [[0.00775194],
        [0.        ],
        [0.        ],
        [0.01694915]],

       [[0.01550388],
        [0.        ],
        [0.        ],
        [0.03389831]],

       ...,

       [[0.4728682 ],
        [1.        ],
        [0.44067797],
        [0.3898305 ]],

       [[0.48062015],
        [1.        ],
        [0.44067797],
        [0.40677965]],

       [[0.4883721 ],
        [1.        ],
        [0.44067797],
        [0.42372882]]], dtype=float32)
Coordinates:
  * time       (time) datetime64[ns] 358kB 2024-07-07 ... 2024-07-07T12:26:25
  * component  (component) object 32B 'hour' 'hour_1' 'minute' 'second'
Dimensions without coordinates: sample
Attributes:
    static_covariates:  None
    hierarchy:          None, <TimeSeries (DataArray) (time

In [26]:
my_model.fit(train_transformed_list, future_covariates=covariates_transformed_list, verbose=True)


Training: |          | 0/? [00:00<?, ?it/s]

In [25]:
forecasts = []
for i, series in enumerate(train_transformed_list):
    forecast = my_model.predict(n=forecast_horizon, series=series, future_covariates=cov_train_transformed)
    forecasts.append(forecast)
    plt.figure(figsize=(10, 6))
    series.plot(label="actual")
    forecast.plot(
        low_quantile=0.1, high_quantile=0.9, label="10-90th percentiles", alpha=0.2, color="m"
    )
    forecast.plot(
        low_quantile=0.01, high_quantile=0.99, label="1-99th percentiles", alpha=0.2, color="b"
    )
    plt.title(f"Прогноз для параметра {value_cols[i]} - MAPE: {mape(series, forecast):.2f}%")
    plt.legend()
    plt.show()





ValueError: The model must be fit before calling predict(). For global models, if predict() is called without specifying a series, the model must have been fit on a single training series.