## get data

In [None]:
# Перезапускаем среду выполнения и импортируем необходимые библиотеки
import pandas as pd

df = pd.read_csv("../data/normalized_without_outliers_25_55000.csv")
df.shape

In [None]:
import sys
sys.path.append("../")

import numpy as np
import matplotlib.pyplot as plt
from typing import Tuple


from tsmoothie.utils_func import sim_randomwalk
from tsmoothie.smoother import *

In [None]:
# получить X
import numpy as np


X = df.loc[:, "norm_price_1":"norm_price_25"].values  # usual version without norm_floor_price
X.shape

## test different models

## smooth functions

In [None]:
def apply_exponential_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = ExponentialSmoother(window_len=2, alpha=0.1)
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('sigma_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="ExponentialSmoother", alpha=0.3)

    if should_log_data:
        print(f"smoth data for ExponentialSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]



In [None]:
def apply_convolution_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = ConvolutionSmoother(window_len=4, window_type='ones')
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('sigma_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="ConvolutionSmoother", alpha=0.3)
    
    if should_log_data:
        print(f"smoth data for ConvolutionSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]


In [None]:
def apply_spectral_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = SpectralSmoother(smooth_fraction=0.3, pad_len=20)
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('sigma_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="SpectralSmoother", alpha=0.3)

    if should_log_data:
        print(f"smoth data for SpectralSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]

In [None]:
def apply_polynomial_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = PolynomialSmoother(degree=6)
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('prediction_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="PolynomialSmoother", alpha=0.3)

    if should_log_data:
        print(f"smoth data for PolynomialSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]

In [None]:
def apply_spline_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = SplineSmoother(n_knots=6, spline_type='natural_cubic_spline')
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('prediction_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="SplineSmoother", alpha=0.3)

    if should_log_data:
        print(f"smoth data for SplineSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]

In [None]:
def apply_gaussian_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = GaussianSmoother(n_knots=5, sigma=0.7)
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('prediction_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="GaussianSmoother", alpha=0.3)

    if should_log_data:
        print(f"smoth data for GaussianSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]

In [None]:
def apply_binner_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = BinnerSmoother(n_knots=6)
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('prediction_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="BinnerSmoother", alpha=0.3)
    
    if should_log_data:
        print(f"smoth data for BinnerSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]

In [None]:
def apply_lowess_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = LowessSmoother(smooth_fraction=0.2, iterations=1)
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('prediction_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="LowessSmoother", alpha=0.3)

    if should_log_data:
        print(f"smoth data for LowessSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]

In [None]:
def apply_kalman_smoother(prices: list[float], plt: plt, should_log_data: bool) -> tuple[plt, list[float]]:
    # operate smoothing
    smoother = KalmanSmoother(component='level_trend', 
                            component_noise={'level':0.1, 'trend':0.1})
    smoother.smooth(prices)

    # generate intervals
    low, up = smoother.get_intervals('kalman_interval')

    # plot the first smoothed timeseries with intervals
    plt.plot(smoother.smooth_data[0], linewidth=3, label="KalmanSmoother", alpha=0.3)

    if should_log_data:
        print(f"smoth data for KalmanSmoother is {smoother.smooth_data[0]}")

    return plt, smoother.smooth_data[0]

## test

In [None]:
for i in range(5):
    prices = X[i]

    fig = plt.figure(figsize=(15,7))
    plt.plot(prices)

    should_log_data = False

    smoothed_predicts_list = []  # списки предиктов каждого модуля заносятся сюда (9 списков, каждый - список предсказаний)
    
    plt, pred = apply_exponential_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_convolution_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_spectral_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_polynomial_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_spline_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_gaussian_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_binner_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_lowess_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)
    plt, pred = apply_kalman_smoother(prices, plt, should_log_data=False)
    smoothed_predicts_list.append(pred)

    # не все возвращают полное кол-во значений, так как некоторые используют оконные функции
    smoothed_predicts_list = [module_predict[:23] for module_predict in smoothed_predicts_list]
    # Convert the list to a numpy array for easier manipulation
    smoothed_data_array = np.array(smoothed_predicts_list)

    # Calculate the mean excluding the largest and smallest values
    mean_of_predicts_without_extremes = np.mean(np.partition(smoothed_data_array, (2, -2), axis=0)[2:-2], axis=0)

    print(mean_of_predicts_without_extremes)

    plt.scatter(range(len(mean_of_predicts_without_extremes)), mean_of_predicts_without_extremes, color="orange")

    plt.legend()
    plt.show()

    # отрисовка лейблов
    fig = plt.figure(figsize=(15,7))

    labels = np.where(prices[:23] > mean_of_predicts_without_extremes, 0, 1)
    plt.scatter(range(len(labels)), prices[:23], c=labels, cmap='coolwarm', label='Price vs. Mean Predict')
    plt.plot(prices)
    plt.plot(range(len(mean_of_predicts_without_extremes)), mean_of_predicts_without_extremes, color="orange")
    plt.show()

