# Quantile Regression instead of ARIMA

Anton Antonov   
December 2024

-------

## Setup

Load the ["Regressionizer"](https://pypi.org/project/Regressionizer/) and other "standard" packages:

In [None]:
from Regressionizer import *
from OutlierIdentifiers import *

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

In [None]:
template='plotly_dark'
data_color='darkgray'

### Temperature data

In [None]:
url = "https://raw.githubusercontent.com/antononcube/MathematicaVsR/master/Data/MathematicaVsR-Data-Atlanta-GA-USA-Temperature.csv"
dfTemperature = pd.read_csv(url)
dfTemperature['DateObject'] = pd.to_datetime(dfTemperature['Date'], format='%Y-%m-%d')
dfTemperature = dfTemperature[(dfTemperature['DateObject'].dt.year >= 2020) & (dfTemperature['DateObject'].dt.year <= 2023)]
dfTemperature

Prepare the data for `Regressionizer` pipelines:

In [None]:
temp_data = dfTemperature[['AbsoluteTime', 'Temperature']].to_numpy()
temp_data.shape

-----

## First fit

Let us demonstrate:

1. Rescaling the data 
2. Using function basis with infinite support

The latter allows using Quantile Regression with Autoregressive Integrated Moving Average (ARIMA).

In [None]:
bFuncs = [lambda x: 1] + [func for h in np.arange(20, 32, 2) for b in np.arange(0, 2*3.14159, 0.25) for func in (lambda x, b=b, h=h, f='sin': np.sin(b + h * x), lambda x, b=b, h=h, f='cos': np.cos(b + h * x))]

len(bFuncs)

In [None]:
obj = (
    Regressionizer(temp_data.copy())
    .echo_data_summary()
    .rescale(regressor=True, value=True)
    .echo_data_summary()
    .quantile_regression_fit(funcs=bFuncs, probs=[0.5])
    .plot(title="Atlanta, Georgia, USA, Temperature, ℃ (rescaled)", template=template, data_color=data_color, width = 1200)
)

In [None]:
obj.take_value().show()

## Most significant components

In [None]:
x = np.linspace(0, len(bFuncs)-1, len(bFuncs))
yAbs = [abs(t) for t in obj.take_lp_solutions()[0]]

# Create the scatter plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=yAbs, mode='markers', marker=dict(color='SteelBlue', size=8), fill='tozeroy'))

# Update layout
fig.update_layout(title='Coefficients',
                  xaxis_title='X-axis',
                  yaxis_title='Y-axis',
                  template = template
                  )

# Show the plot
fig.show()

In [None]:
pos = np.argsort(yAbs)[-6:]
print("pos: ", pos)
bFuncsSmall = [bFuncs[i] for i in pos]

Here we can see which functions (i.e. with what parameters) are in the "top" basis:

In [None]:
bFuncsSmall

### Extension

Fit with the smaller basis:

In [None]:
objSmall = (
    Regressionizer(temp_data.copy())
    .echo_data_summary()
    .rescale(regressor=True, value=True)
    .echo_data_summary()
    .quantile_regression_fit(funcs=[lambda x: 1] + bFuncsSmall, probs=[0.5])
    .plot(title="Atlanta, Georgia, USA, Temperature, ℃ (rescaled)", template=template, data_color=data_color, width = 1200)
)

In [None]:
objSmall.take_value().show()

Plot the regression quantile over extended range of the regressor: 

In [None]:
# Rescaled data
dfDataRescaled = objSmall.take_data()

# Regression quantile
qFuncRescaled = objSmall.take_regression_quantiles().get(0.5)

# Create the traces
trace1 = go.Scatter(
    x=dfDataRescaled[:,0],
    y=dfDataRescaled[:,1],
    mode='markers',
    name='data',
    line=dict(color='gray')
)

# Uniform the x-values
xLonger = np.linspace(0, 1.5, 100)

trace2 = go.Scatter(
    x=xLonger,
    y=[qFuncRescaled(t) for t in xLonger],
    mode='lines',
    name='Largest Sin/Cos fit',
    line=dict(color='red')
)

# Create the figure object
fig = go.Figure(data=[trace1, trace2])

# Update layout
fig.update_layout(title='Extension plot',
                  xaxis_title='x',
                  yaxis_title='y',
                  template = template, width = 1000, height = 400
                  )

# Show the plot
fig.show()