-------

## Setup

In [1]:
from Regressionizer import *
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

-------

## Generate input data

In [2]:
np.random.seed(0)
x = np.linspace(0, 2, 240)
y = np.sin(2 * np.pi * x) + np.random.normal(0, 0.4, x.shape)
data = np.column_stack((x, y))

Plot the generated data:

In [3]:
fig = px.scatter(x=data[:, 0], y=data[:, 1], labels={'x': 'X-axis', 'y': 'Y-axis'}, template='plotly_dark')
fig.show()

------

## Fit given functions

Define a list of functions:

In [4]:
funcs = [lambda x: 1, lambda x: x, lambda x: np.cos(x), lambda x: np.cos(3 * x), lambda x: np.cos(6 * x)]

In [5]:
def chebyshev_t_polynomials(n):
    if n == 0:
        return lambda x: 1
    elif n == 1:
        return lambda x: x
    else:
        T0 = lambda x: 1
        T1 = lambda x: x
        for i in range(2, n + 1):
            Tn = lambda x, T0=T0, T1=T1: 2 * x * T1(x) - T0(x)
            T0, T1 = T1, Tn
        return Tn

chebyshev_polynomials = [chebyshev_t_polynomials(i) for i in range(10)]

Define ***regression quantile*** probabilities:

In [6]:
probs = [0.1, 0.5, 0.9]

Perform quantile regression:

In [7]:
obj2 = (
    Regressionizer(data)
    .quantile_regression_fit(funcs=chebyshev_polynomials, probs=probs)
    .plot(title = "Chebyshev polynomials fit", template="plotly_dark")
)

Plot the obtained regression quantilies:

In [8]:
obj2.take_value().show()

-------

## Fit B-splines

Instead of coming-up with basis functions we can use B-spline basis:

In [9]:
obj = Regressionizer(data).quantile_regression(knots=5, probs=[0.2, 0.5, 0.8]).plot(title="B-splines fit", template="plotly_dark")

In [10]:
obj.take_value().show()

In [11]:
obj

Regressionizer object with data that has 240 records and 3 regression quantiles for [0.2, 0.5, 0.8].

------

## Weather temperature data

In [12]:
import pandas as pd

url = "https://raw.githubusercontent.com/antononcube/MathematicaVsR/master/Data/MathematicaVsR-Data-Atlanta-GA-USA-Temperature.csv"
dfTemperature = pd.read_csv(url)
dfTemperature['DateObject'] = pd.to_datetime(dfTemperature['Date'], format='%Y-%m-%d')
dfTemperature = dfTemperature[(dfTemperature['DateObject'].dt.year >= 2020) & (dfTemperature['DateObject'].dt.year <= 2023)]
dfTemperature

Unnamed: 0,Date,AbsoluteTime,Temperature,DateObject
2555,2020-01-01,3786825600,7.56,2020-01-01
2556,2020-01-02,3786912000,7.28,2020-01-02
2557,2020-01-03,3786998400,12.28,2020-01-03
2558,2020-01-04,3787084800,12.78,2020-01-04
2559,2020-01-05,3787171200,4.83,2020-01-05
...,...,...,...,...
4011,2023-12-27,3912624000,11.67,2023-12-27
4012,2023-12-28,3912710400,7.44,2023-12-28
4013,2023-12-29,3912796800,3.78,2023-12-29
4014,2023-12-30,3912883200,4.83,2023-12-30


In [13]:
temp_data = dfTemperature[['AbsoluteTime', 'Temperature']].to_numpy()
temp_data.shape

(1461, 2)

In [14]:
obj = (
    Regressionizer(temp_data)
    .quantile_regression(knots=20, probs=[0.2, 0.5, 0.8])
    .date_list_plot(title="Atlanta, Georgia, USA, Temperature, ℃", template="plotly_dark", width = 1200)
)

In [15]:
obj.take_value().show()

------

## Outliers

In [27]:
obj = (
    Regressionizer(temp_data)
    .quantile_regression(knots=24, probs=[0.02,  0.96], order=3)
    .plot_outliers(title="Outliers of Atlanta, Georgia, USA, Temperature, ℃", date_list_plot=True, template="plotly_dark", width = 1200)
)

In [28]:
obj.take_value().show()