In [228]:
import numpy as np
import pandas as pd
from scipy.stats.stats import pearsonr # used to calculate correlation coefficient
from pymongo import MongoClient
import json
from datetime import datetime, timedelta, date
from pprint import pprint
from enum import Enum
import copy
from collections import deque
from time import time

# Read timeseries of benchmark and coin

In [119]:
filename_benchmark = 'testing/BTC_Bitfinex_USD.csv'
filename_coin      = 'testing/XRP_Bitfinex_USD.csv'

# --- read benchmark ---
df_benchmark = pd.read_csv(filename_benchmark, header=None, sep=";", 
                            names=['date', 'timestamp', 'open', 'high', 'low', 'close', 'volFrom', 'volTo'])
    
# this makes indexing via date faster
df_benchmark = df_benchmark.set_index(['date'])         # index: string
df_benchmark.index = pd.to_datetime(df_benchmark.index) # index: datetime

pprint(df_benchmark)

# --- read coin ---
df_coin = pd.read_csv(filename_coin, header=None, sep=";", 
                      names=['date', 'timestamp', 'open', 'high', 'low', 'close', 'volFrom', 'volTo'])
    
# this makes indexing via date faster
df_coin = df_coin.set_index(['date'])         # index: string
df_coin.index = pd.to_datetime(df_coin.index) # index: datetime

                        timestamp      open      high       low     close  \
date                                                                        
2018-01-01 00:00:00  1.514765e+09   9830.51   9830.51   9830.51   9830.51   
2018-01-01 01:00:00  1.514768e+09   9830.42   9830.42   9830.42   9830.42   
2018-01-01 02:00:00  1.514772e+09   9874.93   9874.93   9874.93   9874.93   
2018-01-01 03:00:00  1.514776e+09   9894.21   9894.21   9894.21   9894.21   
2018-01-01 04:00:00  1.514779e+09   9900.78   9900.78   9900.78   9900.78   
2018-01-01 05:00:00  1.514783e+09  10032.37  10032.37  10032.37  10032.37   
2018-01-01 06:00:00  1.514786e+09  10120.35  10120.35  10120.35  10120.35   
2018-01-01 07:00:00  1.514790e+09  10120.25  10120.25  10120.25  10120.25   
2018-01-01 08:00:00  1.514794e+09  10070.72  10070.72  10070.72  10070.72   
2018-01-01 09:00:00  1.514797e+09  10091.04  10091.04  10091.04  10091.04   
2018-01-01 10:00:00  1.514801e+09  10170.63  10170.63  10170.63  10170.63   

# Calculate Multiplier and Correlation

If there is no pre-defined libraries to compute (Pearson) correlation, it can also be calculated by its definition: <br>
https://en.wikipedia.org/wiki/Pearson_correlation_coefficient

This relies on calculating the covariance between two vectors, and the variance of each vector indidually. <br>
If you need further clarification please ask.

In [4]:
#

# input variables

#input parameter
dt_benchmark_startTime    = datetime.datetime.strptime("2018-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")

#always current time
dt_benchmark_endTime      = datetime.datetime.strptime("2018-01-31 23:00:00", "%Y-%m-%d %H:%M:%S")

#input parameter
ReturnFrequency = "daily"

#list of currencies

#END input variables
    
dt_currentTime = dt_benchmark_startTime

# add first interval
dt_previousTime = dt_currentTime
if ReturnFrequency == "hourly":
    dt_currentTime += datetime.timedelta(hours=1)
elif ReturnFrequency == "daily":
    dt_currentTime += datetime.timedelta(days=1)
else:
    print('ERROR. Need to implment other frequencies')
    assert(False)


arr_PnL_benchmark  = np.array([])
arr_PnL_coin       = np.array([])
   
#-----------------------------------------#
#          calculate return timeseries    #   
#-----------------------------------------#

while (dt_currentTime <= dt_benchmark_endTime):
    # calculate return of benchmark in period [t-1, t]
    PnL_benchmark = df_benchmark.loc[dt_currentTime]['close'] / \
                    df_benchmark.loc[dt_previousTime]['close'] -1.0
    arr_PnL_benchmark = np.append(arr_PnL_benchmark, PnL_benchmark)
    
    # calculate return of strategy in period [t-1, t] (based on equity, i.e. MtM value of positions)
    PnL_coin = df_coin.loc[dt_currentTime]['close']  / \
               df_coin.loc[dt_previousTime]['close'] -1.0
    arr_PnL_coin = np.append(arr_PnL_coin, PnL_coin)
     
    # move to next timepoint
    if ReturnFrequency == "hourly":
        dt_previousTime += datetime.timedelta(hours=1)
        dt_currentTime  += datetime.timedelta(hours=1)
    elif ReturnFrequency == "daily":
        dt_previousTime += datetime.timedelta(days=1)
        dt_currentTime += datetime.timedelta(days=1)
    else:
        print('ERROR. Need to implment other frequencies')
        assert(False)

#-----------------------------------------#
#          calculate multiplier           #   
#-----------------------------------------#
arr_x = arr_PnL_benchmark
arr_y = arr_PnL_coin

# least square regression (linear): y = alpha + beta*x
linReg = np.polyfit(x=arr_PnL_benchmark, y=arr_PnL_coin, deg=1)

alpha = linReg[1] # this is the y-intercept, not needed
beta  = linReg[0] # this is the slope, which also is the multiplier
multiplier = beta
print("multiplier            : ", multiplier)

#-----------------------------------------#
#          calculate correlation          #   
#-----------------------------------------#
correlation = pearsonr(arr_PnL_benchmark, arr_PnL_coin)
print("correlation            :", correlation[0])

multiplier            :  -0.6945408340863843
correlation            : -0.15598094157469428


# Multiplier and Correlation class calculator

Class calculates multiplier and correlation matrix


In [2]:
from flask import Flask
from flask import request
import numpy as np
import pandas as pd
from scipy.stats.stats import pearsonr # used to calculate correlation coefficient
from pymongo import MongoClient
import json
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from calendar import monthrange
from pprint import pprint
from enum import Enum
import copy
from collections import deque
import time

MONGO_DB_NAME       = 'bitcoin'
MONGO_HOST       = 'localhost'
MONGO_COLLECTIONS        = ['daily_data_test', 'hourly_data_test']
MONGO_DB_DEFAULT_COLLECTION = 'daily_data_test'

class RequestFrequency(Enum):
    DAILY  = 0
    HOURLY = 1

class HourlyTimeIntervals(Enum):
    A_DAY       = 1
    FIVE_DAYS   = 5
    WEEK        = 7
    TEN_DAYS    = 10
    TWO_WEEKS   = 14

class DailyTimeIntervals(Enum):
    A_MONTH      = 1
    THREE_MONTHS = 3
    HALF_YEAR    = 6
    NINE_MONTHS  = 9
    A_YEAR       = 12

class MongoConnector:
    def __init__(self,
                host=MONGO_HOST,
                db_name=MONGO_DB_NAME):
        self._mongo_connection = MongoClient(host=host,
                                             authSource=db_name)
        self.db = self._mongo_connection[db_name]
        
        
    def find_one(self, 
                *params,
                collection=MONGO_DB_DEFAULT_COLLECTION):
        collection = self.db[collection]
        if not collection:
            raise Exception('collection not found')
        return collection.find_one(*params)

    def find(self, 
             *params,
             colleciton=MONGO_DB_DEFAULT_COLLECTION):
        collection = self.db[colleciton]
        if not collection:
            raise Exception('collection not found')
        return collection.find(*params)
        

class MultiplierCorrelationCalculator:
    FREQUENCY_LIST        = RequestFrequency.__members__.keys()
    HOURLY_TIME_INTERVALS = list(map(lambda x: x.value, HourlyTimeIntervals.__members__.values()))
    DAILY_TIME_INTERVALS  = list(map(lambda x: x.value, DailyTimeIntervals.__members__.values()))
    TIME_INTERVALS_DICT   = {
        'hourly': HOURLY_TIME_INTERVALS,
        'daily': DAILY_TIME_INTERVALS
    }

    TIME_INTERVALS_CALCULATOR = {
        'daily' : lambda x: x * 30,
        'hourly': lambda x: x * 24
    }

    def __init__(self,
                 horizon,
                 currencies_list=['all'],
                 return_frequency='daily',
                 db_name=MONGO_DB_NAME):
        if return_frequency.upper() not in self.FREQUENCY_LIST:
            raise Exception('Only [daily, hourly] values supports for return_frequency parameter yet...')
        if horizon not in self.TIME_INTERVALS_DICT[return_frequency]:
            msg = 'Only %s values supports for %s collection' % (','.join(self.time_points), 
                                                                 return_frequency)
            raise Exception(msg)
        bounds_normalizer     = self.TIME_INTERVALS_CALCULATOR[return_frequency]
        self.horizon          = bounds_normalizer(horizon)
        self.collection       = "%s_data_test" % return_frequency
        mongo_c               = MongoClient(host=MONGO_HOST,
                                            authSource=db_name)
        self.connector        = mongo_c[db_name][self.collection]
        self.currencies_list  = currencies_list
        if currencies_list == ['all']:
            params         = {}, {'Ccy': 1, '_id': 0}
            currencies_collection = self.connector.find(*params).limit(15)
            self.currencies_list  = [x['Ccy'] for x in currencies_collection]
    
    def calculate_pairs(self):
        df_prices = []
        params    = {'Ccy': {'$in': self.currencies_list}}, {'history.close': 1, 'Ccy': 1}
        for data in self.connector.find(*params):
            da_data = [history['close'] for history in list(reversed(data['history']))[1:self.horizon]]
            df_prices.append(da_data)
        df_prices = pd.DataFrame(list(zip(*df_prices)), columns=self.currencies_list)
        df_returns=df_prices / df_prices.shift(1) - 1
        df_correl=df_returns.corr()
        corel = pd.DataFrame.to_dict(df_correl)
        df_beta=df_returns.cov()/df_returns.var()
        beta = pd.DataFrame.to_dict(df_beta)
        return {'multiplier': beta, 'correlation': corel }

In [638]:
coins = ['BTC', 'ETH', 'BCH']
horizon = 1
data = MultiplierCorrelationCalculator(horizon=horizon,
                                    currencies_list=['all'],
                                    return_frequency='daily',
                                    db_name=MONGODB_NAME
                                    )
pprint(data.calculate_pairs())

{'correlation': {'BCH': {'BCH': 1.0,
                         'BTC': 0.8597257267610219,
                         'EOS': 0.8574235781291275,
                         'ETH': 0.8240069225092205,
                         'XRP': 0.6322640108113927},
                 'BTC': {'BCH': 0.8597257267610219,
                         'BTC': 1.0,
                         'EOS': 0.9318901555860447,
                         'ETH': 0.934233793603264,
                         'XRP': 0.7028519906887352},
                 'EOS': {'BCH': 0.8574235781291275,
                         'BTC': 0.9318901555860447,
                         'EOS': 1.0,
                         'ETH': 0.938985666891507,
                         'XRP': 0.7575719766267578},
                 'ETH': {'BCH': 0.8240069225092205,
                         'BTC': 0.934233793603264,
                         'EOS': 0.938985666891507,
                         'ETH': 1.0,
                         'XRP': 0.7846656733575303},
                 'XR

In [36]:
# selected_params   = {}, {'Ccy': 1, '_id': 0}
connector  = MongoClient(host=MONGO_HOST,
                         authSource=MONGO_DB_NAME)
collection = connector['bitcoin']['daily_data_test']
df_prices = []
horizon = 30 * 48
currencies_list = [ "ETH",
            "XRP",
            "BTC",
            "BCH",
            "EOS"]
params    = {'Ccy': {'$in': currencies_list}}, {'history.close': 1, 'Ccy': 1}
for data in collection.find(*params):
    da_data = [history['close'] for history in list(data['history'])[1:horizon]]
    df_prices.append(da_data)
df_prices = pd.DataFrame(list(zip(*df_prices)), columns=currencies_list)
df_prices.head()


Unnamed: 0,ETH,XRP,BTC,BCH,EOS
0,551.59,0.08584,0.912,1.2,0.01602
1,405.92,0.0808,1.05,1.2,0.016
2,253.13,0.07474,3.0,1.2,0.01739
3,220.3,0.07921,4.2,0.99,0.017
4,241.87,0.0505,3.24,1.29,0.01469


In [37]:
df_returns=df_prices / df_prices.shift(1) - 1
pprint(df_returns)

          ETH       XRP       BTC       BCH       EOS
0         NaN       NaN       NaN       NaN       NaN
1   -0.264091 -0.058714  0.151316  0.000000 -0.001248
2   -0.376404 -0.075000  1.857143  0.000000  0.086875
3   -0.129696  0.059807  0.400000 -0.175000 -0.022427
4    0.097912 -0.362454 -0.228571  0.303030 -0.135882
5    0.333774  0.240000 -0.117284  0.457364  0.055140
6    0.064569 -0.129032  0.129371 -0.047872 -0.032258
7   -0.123053 -0.074074 -0.179567  0.000000 -0.100000
8   -0.088621  0.108911 -0.105660 -0.234637  0.111111
9    0.188648  0.071429 -0.063291 -0.051095 -0.000667
10  -0.031447 -0.018333 -0.234234  0.046154 -0.098065
11  -0.065981  0.186757 -0.100000 -0.088235 -0.018491
12   0.016534 -0.103004  0.281046  0.225806  0.115298
13  -0.008899  0.082137 -0.127551 -0.052632 -0.054054
14   0.007802 -0.099484 -0.116959 -0.027778  0.123571
15   0.485551 -0.018003 -0.145695 -0.035714 -0.014622
16   0.549518  0.000000 -0.054264 -0.081481 -0.052903
17   0.070612 -0.050000  0.2

In [38]:
df_correl=df_returns.corr()
pprint(df_correl)

          ETH       XRP       BTC       BCH       EOS
ETH  1.000000 -0.031343 -0.173781 -0.015197  0.001925
XRP -0.031343  1.000000  0.005862  0.032282  0.040747
BTC -0.173781  0.005862  1.000000  0.009353 -0.015637
BCH -0.015197  0.032282  0.009353  1.000000  0.015914
EOS  0.001925  0.040747 -0.015637  0.015914  1.000000


In [40]:
df_beta=df_returns.cov()/df_returns.var()
pprint(df_beta)

          ETH       XRP       BTC       BCH       EOS
ETH  1.000000 -0.030977 -0.124584 -0.015890  0.001069
XRP -0.031712  1.000000  0.004252  0.034152  0.022893
BTC -0.242404  0.008081  1.000000  0.013641 -0.012112
BCH -0.014534  0.030514  0.006413  1.000000  0.008451
EOS  0.003467  0.072524 -0.020188  0.029966  1.000000
