# Rolling Linear Regression

### Loading Libraries

In [4]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Warnings
import warnings

# Scikit-Learn
from sklearn.preprocessing import scale

# Theano
# import theano

# PyMC3 & ArViz
import arviz
# import pymc3 as pm

# Path
from pathlib import Path

# yFinance
import yfinance as yf

In [5]:
sns.set_style('whitegrid')

# model_path = Path('models')

warnings.filterwarnings('ignore')

### Simple Linear Regression Demo

#### Artificial Data

In [7]:
size = 200
true_intercept = 1
true_slope = 2

x = np.linspace(0, 1, size)
true_regression_line = true_intercept + true_slope * x
y = true_regression_line + np.random.normal(scale=.5, size=size)

x_shared = theano.shared(x)

#### Model Definition

In [8]:
with pm.Model() as linear_regression: 

    # Define priors
    sd = pm.HalfCauchy('sigma', beta=10, testval=1) 
    intercept = pm.Normal('intercept', 0, sd=20)
    slope = pm.Normal('slope', 0, sd=20)

    # Define likelihood
    likelihood = pm.Normal('y', mu=intercept + slope * x_shared, sd=sd, observed=y)

In [9]:
pm.model_to_graphviz(linear_regression)

#### HMC Inference

In [10]:
with linear_regression:
    # Inference
    trace = pm.sample(draws=2500, 
                      tune=1000, 
                      cores=1) 

#### Posterior Parameter Distributions

In [12]:
arviz.plot_posterior(trace);
plt.show()

### Linear Regression for Pairs Trading

In [13]:
prices = yf.download('GFI GLD', period='max').dropna().loc[:, 'Close']

In [14]:
returns = prices.pct_change().dropna()

In [15]:
prices.info()

In [16]:
returns.info()

In [17]:
prices_normed = prices.apply(scale)

In [18]:
fig, axes= plt.subplots(figsize=(14,4), ncols=2)

prices.plot(secondary_y='GFI', ax=axes[0])
axes[0].set_title('Asset Price Series')
points = axes[1].scatter(prices.GLD,
                         prices.GFI,
                         c=np.linspace(0.1, 1, len(prices)), 
                         s=15, 
                         cmap='winter')
axes[1].set_title('Price Correlation over Time')
cbar = plt.colorbar(points, ax=axes[1])
cbar.ax.set_yticklabels([str(p.year) for p in returns[::len(returns)//10].index]);

sns.regplot(x='GLD', y='GFI', 
            data=prices, 
            scatter=False, 
            color='k',
            line_kws={'lw':1, 
                      'ls':'--'},
            ax=axes[1])
sns.despine()
fig.tight_layout();
plt.show()

In [19]:
with pm.Model() as model_reg:
    pm.glm.GLM.from_formula('GFI ~ GLD', prices)
    trace_reg = pm.sample(draws=5000,
                          tune=1000,
                          cores=1)

In [20]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111, 
                     xlabel='GLD', 
                     ylabel='GFI',
                     title='Posterior predictive regression lines')
points = ax.scatter(prices.GLD,
                         prices.GFI,
                         c=np.linspace(0.1, 1, len(prices)), 
                         s=15, 
                         cmap='winter')

pm.plot_posterior_predictive_glm(trace_reg[100:], 
                                 samples=250, 
                                 label='posterior predictive regression lines',
                                 lm=lambda x, 
                                 sample: sample['Intercept'] + sample['GLD'] * x,
                                 eval=np.linspace(prices.GLD.min(), prices.GLD.max(), 100))
cb = plt.colorbar(points)
cb.ax.set_yticklabels([str(p.year) for p in prices[::len(prices)//10].index]);
ax.legend(loc=0);
plt.show()

### Rolling Regression

In [21]:
model_randomwalk = pm.Model()

with model_randomwalk:
    sigma_alpha = pm.Exponential('sigma_alpha', 50.)    
    alpha = pm.GaussianRandomWalk('alpha', 
                                  sd=sigma_alpha, 
                                  shape=len(prices))
    
    sigma_beta = pm.Exponential('sigma_beta', 50.) 
    beta = pm.GaussianRandomWalk('beta', 
                                 sd=sigma_beta, 
                                 shape=len(prices))

In [22]:
with model_randomwalk:
    # Define Regression
    regression = alpha + beta * prices_normed.GLD
    
    sd = pm.HalfNormal('sd', sd=.1)
    likelihood = pm.Normal('y', 
                           mu=regression, 
                           sd=sd, 
                           observed=prices_normed.GFI)

In [23]:
with model_randomwalk:
    trace_rw = pm.sample(tune=2000, 
                         draws=200,
                         cores=1,
                         target_accept=.9)

### Results Analysis

In [24]:
fig, axes = plt.subplots(figsize=(15, 5), ncols=2, sharex=True)

axes[0].plot(trace_rw['alpha'].T, 'r', alpha=.05)
axes[0].set_xlabel('Time') 
axes[0].set_ylabel('Alpha')
axes[0].set_title('Intercept')
axes[0].set_xticklabels([str(p.date()) for p in prices[::len(prices)//9].index])

axes[1].plot(trace_rw['beta'].T, 'b', alpha=.05)
axes[1].set_xlabel('Time') 
axes[1].set_ylabel('Beta')
axes[1].set_title('Slope')

fig.suptitle('Changes of coefficients over time', fontsize=14)
sns.despine()
fig.tight_layout()
fig.subplots_adjust(top=.9);
plt.show()

In [25]:
x = np.linspace(prices_normed.GLD.min(), 
                 prices_normed.GLD.max())

dates = [str(p.year) for p in prices[::len(prices)//9].index]

colors = np.linspace(0.1, 1, len(prices))
colors_sc = np.linspace(0.1, 1, len(trace_rw[::10]['alpha'].T))

cmap = plt.get_cmap('winter')

In [26]:
fig, ax = plt.subplots(figsize=(14, 8))

for i, (alpha, beta) in enumerate(zip(trace_rw[::25]['alpha'].T, 
                                      trace_rw[::25]['beta'].T)):
    for a, b in zip(alpha[::25], beta[::25]):
        ax.plot(x,
                a + b*x,
                alpha=.01,
                lw=.5,
                c=cmap(colors_sc[i]))
        
points = ax.scatter(prices_normed.GLD,
                    prices_normed.GFI,
                    c=colors,
                    s=5, 
                    cmap=cmap)

cbar = plt.colorbar(points)
cbar.ax.set_yticklabels(dates);

ax.set_xlabel('GLD')
ax.set_ylabel('GFI')
ax.set_title('Posterior predictive regression lines')
sns.despine()
fig.tight_layout();
plt.show()