Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spread Modeling - ML Approach #25

Merged
merged 37 commits into from
Feb 15, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
c87c461
Init spread modeling commit
AaronDeb Nov 23, 2020
83c6265
misc stash
AaronDeb Nov 25, 2020
dc777af
Added latest changes
AaronDeb Nov 30, 2020
f016d50
Fixed filter indexing issue
AaronDeb Dec 1, 2020
9d80cf2
Set full Coverage/test commit
AaronDeb Dec 5, 2020
bb59564
Added keras requirement
AaronDeb Dec 5, 2020
952daf7
Added tensorflow requirement
AaronDeb Dec 5, 2020
7731607
Merge branch 'develop' into spread_modeling
PanPip Dec 10, 2020
9aaf52b
Update license links in Spread Modeling - ML Approach
PanPip Dec 10, 2020
a96bcc5
Merge branch 'develop' into spread_modeling
PanPip Dec 15, 2020
c3a67bb
Slightly messy commit, showcases docs
AaronDeb Dec 15, 2020
03fdff4
Merge
AaronDeb Dec 15, 2020
abe6537
removed deprecated files
AaronDeb Dec 15, 2020
18bf05b
Added new tests and full coverage
AaronDeb Dec 21, 2020
4183742
Fixed lint/coverage issues
AaronDeb Dec 21, 2020
9dee091
Reversed some changes
AaronDeb Dec 21, 2020
b263050
Added option to dynamically set the Open/Close columns in the dataset…
AaronDeb Dec 22, 2020
74dd683
Added more docstrings/comments and tidied up some sections
AaronDeb Dec 29, 2020
97bac50
Improve pylint for Spread Modelling
PanPip Jan 21, 2021
cfd4fe3
Minor code style fixes for Spread Modelling
PanPip Jan 21, 2021
e463d19
Improve docs style for Spread Modelling
PanPip Jan 21, 2021
2eb8faf
Small config file fix
PanPip Jan 22, 2021
7bd6cc6
Added latest changes.
AaronDeb Feb 10, 2021
223646e
Merge branch 'spread_modeling' of https://github.com/hudson-and-thame…
AaronDeb Feb 10, 2021
c7da3b0
pylint fix
AaronDeb Feb 10, 2021
8ea404c
another minor pylint fix
AaronDeb Feb 10, 2021
44b382d
Docs fixes
AaronDeb Feb 11, 2021
53b1d0d
Merge branch 'develop' into spread_modeling
PanPip Feb 12, 2021
eb0ee51
Minor code adjustments Spread Modelling
PanPip Feb 12, 2021
cfa318b
Minor docs adjustments Spread Modelling
PanPip Feb 12, 2021
31462b4
fixes for PR comments
AaronDeb Feb 13, 2021
e82445b
pylint fixes
AaronDeb Feb 13, 2021
1c3ee2f
Added changelog
AaronDeb Feb 13, 2021
4ea2ac1
Merge branch 'develop' into spread_modeling
PanPip Feb 15, 2021
7f72a8b
Update versions in Spread Modeling
PanPip Feb 15, 2021
180cfe7
Minor docs adjustments Spread Modelling
PanPip Feb 15, 2021
f025d1b
Added installation warning note
PanPip Feb 15, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions arbitragelab/ml_approach/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,7 @@
"""

from arbitragelab.ml_approach.pairs_selector import PairsSelector
from arbitragelab.ml_approach.tar import TAR
from arbitragelab.ml_approach.pi_sigma import PiSigmaNeuralNetwork
from arbitragelab.ml_approach.mlp import MultiLayerPerceptron
from arbitragelab.ml_approach.rnn import RecurrentNeuralNetwork
47 changes: 47 additions & 0 deletions arbitragelab/ml_approach/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2019, Hudson and Thames Quantitative Research
# All rights reserved
# Read more: https://hudson-and-thames-arbitragelab.readthedocs-hosted.com/en/latest/additional_information/license.html
"""
This is the base class for all the neural network implementations in this module.
"""

import matplotlib.pyplot as plt
from keras.callbacks.callbacks import History


class BaseNeuralNetwork:
"""
Skeleton Class to be inherited by child
neural network implementations.
"""

def __init__(self):
"""
Initializing variables.
"""

self.fitted_model = None

def fit(self, *args, **kwargs) -> History:
"""
Wrapper over the keras model fit function.
"""

fitted_model = self.model.fit(*args, **kwargs)
self.fitted_model = fitted_model

return fitted_model

def predict(self, *args, **kwargs):
"""
Wrapper over the keras model predict function.
"""

return self.model.predict(*args, **kwargs)

def plot_loss(self) -> list:
"""
Method that returns visual plot of the loss trajectory.
"""

return plt.plot(self.fitted_model.history['loss'])
113 changes: 113 additions & 0 deletions arbitragelab/ml_approach/correlation_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright 2019, Hudson and Thames Quantitative Research
# All rights reserved
# Read more: https://hudson-and-thames-arbitragelab.readthedocs-hosted.com/en/latest/additional_information/license.html
"""
This module implements the Correlation Filter described in Dunis et al. (2005).
AaronDeb marked this conversation as resolved.
Show resolved Hide resolved
"""

AaronDeb marked this conversation as resolved.
Show resolved Hide resolved
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


class CorrelationFilter:
"""
Correlation Filter implementation.
"""

def __init__(self, buy_threshold: float = 0.4, sell_threshold: float = 0.8, lookback: int = 30):
"""
Initialization of trade parameters. The buy/sell threshold are values in terms
of change in correlation.

:param buy_threshold: (float) If larger than this value, buy.
:param sell_threshold: (float) If smaller than this value, sell.
:param lookback: (int) Number of lookback days for rolling correlation.
"""

self.lookback = lookback
self.buy_threshold = buy_threshold
self.sell_threshold = sell_threshold
self.corr_series = None

def fit(self, frame: pd.DataFrame) -> CorrelationFilter:
"""
Sets the correlation benchmark inside of the class object.

:param frame: (pd.DataFrame) Time series consisting of both legs of the spread.
:return: (CorrelationFilter) Returns calss itself.
"""

# Making a copy of the input data
frame = frame.copy()

# Calculating the correlation delta series
two_legged_df = frame.iloc[:, 0:2]
corr_series = self._get_rolling_correlation(
two_legged_df, lookback=self.lookback).diff().dropna()

self.corr_series = corr_series

return self

def transform(self, frame: pd.DataFrame) -> pd.DataFrame:
"""
Marks trade signals based on the correlation benchmark generated in the fit
method.

:param frame: (pd.DataFrame) Spread time series.
:return: (pd.DataFrame) Time series augmented with the trade side
information.
"""

# Making a copy of the input data
working_frame = frame.copy()

# Generating signals
buy_signal = working_frame.index.isin(
self.corr_series[self.corr_series > self.buy_threshold].index)
sell_signal = working_frame.index.isin(
self.corr_series[self.corr_series < self.sell_threshold].index)

working_frame['side'] = 0
working_frame.loc[buy_signal, 'side'] = 1
working_frame.loc[sell_signal, 'side'] = -1
working_frame['side'] = working_frame['side'].shift(1)

return working_frame

@staticmethod
def _get_rolling_correlation(frame: pd.DataFrame, lookback: int) -> pd.Series:
"""
Calculates rolling correlation between the first two columns in the frame variable.
Assuming that the first two columns are the opposing legs of the spread.

:param frame: (pd.DataFrame) DataFrame representing both legs of the spread.
:param lookback: (int) The lookback range of the rolling mean.
:param scale: (bool) If True the correlation range will be changed from
the usual [-1, 1] to [0, 1].
AaronDeb marked this conversation as resolved.
Show resolved Hide resolved
:return: (pd.Series) Rolling correlation series of the input frame.
"""

two_legged_df = frame.iloc[:, 0:2]
two_legged_df.index.name = '_index_'

# Rolling correlation calculation
daily_corr = two_legged_df.rolling(
lookback, min_periods=lookback).corr()
daily_corr = daily_corr.iloc[:, 0].reset_index().dropna()

final_corr = daily_corr[daily_corr['level_1']
== two_legged_df.columns[1]]
final_corr.set_index('_index_', inplace=True)
final_corr.drop(['level_1'], axis=1, inplace=True)
final_corr.dropna(inplace=True)

# Scaling to [0,1] if needed
scaler = MinMaxScaler()
scaled_corr = scaler.fit_transform(
final_corr.iloc[:, 0].values.reshape(-1, 1)) # .diff()
corr_series = pd.Series(data=scaled_corr.reshape(
1, -1)[0], index=final_corr.index)
corr_series.dropna(inplace=True)

return corr_series
93 changes: 93 additions & 0 deletions arbitragelab/ml_approach/feature_expander.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright 2019, Hudson and Thames Quantitative Research
# All rights reserved
# Read more: https://hudson-and-thames-arbitragelab.readthedocs-hosted.com/en/latest/additional_information/license.html
"""
This module implements the Feature Expansion class.
"""

import numpy as np

# pylint: disable=W0102

class FeatureExpander:
"""
Higher order term Feature Expander implementation.
"""

def __init__(self, methods=[], n_orders=1):
"""

:param methods: (list) Possible expansion methods [chebyshev, legendre, laguerre, power].
:param n_orders: (int) Number of orders.
"""
self.methods = methods
self.n_orders = n_orders
self.dataset = None

@staticmethod
def _chebyshev(series, degree):
"""

:param series: (pd.Series)
:param degree: (int)
"""

return np.polynomial.chebyshev.chebvander(series, degree)

@staticmethod
def _legendre(series, degree):
"""

:param series: (pd.Series)
:param degree: (int)
"""

return np.polynomial.legendre.legvander(series, degree)

@staticmethod
def _laguerre(series, degree):
"""

:param series: (pd.Series)
:param degree: (int)
"""

return np.polynomial.laguerre.lagvander(series, degree)

@staticmethod
def _power(series, degree):
"""

:param series: (pd.Series)
:param degree: (int)
"""

return np.polynomial.polynomial.polyvander(series, degree)

def fit(self, frame):
"""


:param frame: (np.array) dataset
"""
self.dataset = frame
return self

def transform(self) -> list:
"""
Transform data to polynomial features

:return: List of lists of the expanded values.
"""
new_dataset = []

for row in self.dataset.values:
expanded_row = list(row)
for degree in range(1, self.n_orders):
for meth in self.methods:
expanded_row.extend(
np.ravel(getattr(self, '_' + meth)(row, degree)))

new_dataset.append(np.ravel(expanded_row).tolist())

return new_dataset