<a href="https://colab.research.google.com/github/alexandreib/QuantDesign/blob/main/QD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
def is_gpu():
    from tensorflow.python.client import device_lib # when only CPU is enabled the list shows two CPU entries, otherwise there are more, listing GPU as well
    return len(device_lib.list_local_devices()) >= 2

def in_colab():
    return True if 'google.colab' in str(get_ipython()) else False

if in_colab():
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy as sp

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import copy

In [33]:
CFG = {}
CFG['verbose'] = True
CFG['l_resamples'] = ['30min', '60min', 'D']

In [None]:
# raw =[]
# with open('/content/drive/MyDrive/QuantDesign/GCESIE_1min.txt') as file:
#     for i, line in enumerate(file):
#         if i > 2 :
#             line = line.split(' ')
#             idx_date = -7
#             idx_time = -6
#             idx_close= -2
#             raw.append({'index' : i, 'date' : line[idx_date], 'time' : line[idx_time][:-1], 'close' : line[idx_close][:-1],})
#         else :
#             print(line)
# raw = pd.DataFrame(raw).set_index('index')
# raw['datetime'] = pd.to_datetime(raw['date'] + ' ' + raw['time'])
# raw['close'] = raw['close'].astype(float)
# raw.to_parquet('/content/drive/MyDrive/QuantDesign/GCESIE_1min.parquet')

raw = pd.read_parquet('/content/drive/MyDrive/QuantDesign/GCESIE_1min.parquet')

if CFG['verbose'] : display(raw.head())
if CFG['verbose'] : print(raw.shape)

In [None]:
ldf = []

for i, resample in enumerate(CFG['l_resamples']) :
    ldf.append(pd.concat([raw[['datetime','close']].set_index('datetime').resample(resample).first().rename(columns = {'close':'open'}),
                raw[['datetime','close']].set_index('datetime').resample(resample).last().rename(columns = {'close':'close'}),
                raw[['datetime','close']].set_index('datetime').resample(resample).max().rename(columns = {'close':'high'}),
                raw[['datetime','close']].set_index('datetime').resample(resample).min().rename(columns = {'close':'low'}),
                raw[['datetime','close']].set_index('datetime').resample(resample).mean().rename(columns = {'close':'mean'}),
            ], axis = 1).dropna().reset_index())
    if CFG['verbose'] : display(ldf[i].head())

In [None]:
def calculate_twap(df, column = 'mean', episode = 2):
  return df[column].rolling(episode).mean().array

def ATR(df, window = 14):
    High_minus_Low = df['high'] - df['low']
    High_minus_Close = abs(df['high'] - df['close'].shift(1))
    Low_minus_Close = abs(df['low'] - df['close'].shift(1))
    TR = pd.concat([High_minus_Low, High_minus_Close, Low_minus_Close], axis=1).max(axis=1)
    return TR.ewm(alpha=1 / window).mean()

for i, df in enumerate(ldf) :
    df = ldf[i]
    df[f'TWAP_10'] = calculate_twap(df, episode = 10)
    df[f'TWAP_20'] = calculate_twap(df, episode = 20)
    df[f'TWAP_30'] = calculate_twap(df, episode = 30)
    df[f'TWAP_40'] = calculate_twap(df, episode = 40)

    df['ATR'] = ATR(df) # Other option : talib.ATR(df['High'], df['Low'], df['Close'], timeperiod=14)

    df = df.dropna().reset_index(drop = True)
    ldf[i] = df
    if CFG['verbose'] : display(df.tail())

In [None]:
dict_colors = {'close': 'brown',
               'TWAP_10': 'black',
               'TWAP_20': 'green',
               'TWAP_30': 'red',
               'TWAP_40': 'blue'}

last_x_point = 1000

fig = make_subplots(rows=2, cols=len(ldf))
for i, df in enumerate(ldf) :
    for col in dict_colors :
        fig.add_trace(go.Scatter(x = df['datetime'].tail(last_x_point),
                                 y = df[col].tail(last_x_point),
                                 mode = 'lines',
                                 name = col,
                                 line=dict(color=dict_colors[col])),
                      row = 1, col = i+1)

    fig.add_trace(go.Scatter(x = df['datetime'].tail(last_x_point),
                                y = df['ATR'].tail(last_x_point),
                                mode = 'lines',
                                name = 'ATR',
                                line=dict(color='orange', width=2)),
                    row=2, col= i+1)

fig.update_layout( title='Spread Gold', height=600,
    margin=dict(l=10, r=10, t=30, b=10),
    showlegend=True,
)

fig.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
for i, df in enumerate(ldf) :
    sns.scatterplot(df['mean'], ax = ax1,)
    sns.kdeplot(df['mean'], ax = ax2, )
plt.show()

In [None]:
df = copy.deepcopy(ldf[0])
df.columns = df.columns + '_' + CFG['l_resamples'][0]

for i, dft in enumerate(ldf[1:]) :
    dft = dft.drop(['open', 'close', 'high', 'low',  'mean'], axis = 1)
    dft.columns = dft.columns + '_' + CFG['l_resamples'][i+1]
    dft[f"datetime_{CFG['l_resamples'][i+1]}"] = dft[f"datetime_{CFG['l_resamples'][i+1]}"].shift(-1)
    df = df.merge(dft, left_on = f"datetime_{CFG['l_resamples'][0]}", right_on = f"datetime_{CFG['l_resamples'][i+1]}", how = 'left')

if CFG['verbose'] : display(df.head())
if CFG['verbose'] : display(df.tail())

In [None]:
df['diff_10_30min'] = diff_10_30min = df['close_30min'] - df[f'TWAP_10_30min']
df['diff_20_30min'] = diff_20_30min = df['close_30min'] - df[f'TWAP_20_30min']
df['diff_30_30min'] = diff_30_30min = df['close_30min'] - df[f'TWAP_30_30min']
df['diff_40_30min'] = diff_40_30min = df['close_30min'] - df[f'TWAP_40_30min']

df['diff_10_60min'] = diff_10_60min = df['close_30min'] - df[f'TWAP_10_60min']
df['diff_20_60min'] = diff_20_60min = df['close_30min'] - df[f'TWAP_20_60min']
df['diff_30_60min'] = diff_30_60min = df['close_30min'] - df[f'TWAP_30_60min']
df['diff_40_60min'] = diff_40_60min = df['close_30min'] - df[f'TWAP_40_60min']

df['diff_10_D'] = diff_10_D = df['close_30min'] - df[f'TWAP_10_D']
df['diff_20_D'] = diff_20_D = df['close_30min'] - df[f'TWAP_20_D']
df['diff_30_D'] = diff_30_D = df['close_30min'] - df[f'TWAP_30_D']
df['diff_40_D'] = diff_40_D = df['close_30min'] - df[f'TWAP_40_D']

df['Log_Return'] = Log_Return = np.log(df['close_30min'] / df['close_30min'].shift(1))
df = df.ffill().dropna()

In [None]:
fig, ax = plt.subplots(3, 3, figsize=(25, 8))
for i, resample in enumerate(CFG['l_resamples']) :
    sns.lineplot(data = df.tail(last_x_point), x ='datetime_30min' ,y = 'close_30min', ax = ax[0][i])

    sns.lineplot(data = df.tail(last_x_point), x ='datetime_30min' ,y = f'diff_10_{resample}', ax = ax[1][i])
    sns.lineplot(data = df.tail(last_x_point), x ='datetime_30min' ,y = f'diff_20_{resample}', ax = ax[1][i])
    sns.lineplot(data = df.tail(last_x_point), x ='datetime_30min' ,y = f'diff_30_{resample}', ax = ax[1][i])
    sns.lineplot(data = df.tail(last_x_point), x ='datetime_30min' ,y = f'diff_40_{resample}', ax = ax[1][i])

    sns.kdeplot(df[f'diff_10_{resample}'], ax = ax[2][i])
    sns.kdeplot(df[f'diff_20_{resample}'], ax = ax[2][i])
    sns.kdeplot(df[f'diff_30_{resample}'], ax = ax[2][i])
    sns.kdeplot(df[f'diff_40_{resample}'], ax = ax[2][i])

plt.show()

In [None]:
def calculate_returns(weights):
    global Log_Return, threshold, diff_10_30min, diff_20_30min, diff_30_30min, diff_40_30min, diff_10_60min, diff_20_60min, diff_30_60min, diff_40_60min, diff_10_D, diff_20_D, diff_30_D, diff_40_D
    index = (weights[0] * diff_10_30min + weights[1] * diff_20_30min + weights[2] * diff_30_30min + weights[3] * diff_40_30min + \
             weights[4] * diff_10_60min + weights[5] * diff_20_60min + weights[6] * diff_30_60min + weights[7] * diff_40_60min + \
             weights[8] * diff_10_D + weights[9] * diff_20_D + weights[10] * diff_30_D + weights[11] * diff_40_D
             ).shift().fillna(0)
    index[threshold > np.abs(index)] = 0
    return -1 * np.sum(np.tanh(index) * Log_Return)

number_of_weights = len(CFG['l_resamples']) * 4
weights = [1/number_of_weights for i in range(number_of_weights)]
threshold = 2

def optimize_weights(threshold = 0) :
    result = sp.optimize.minimize(fun = calculate_returns,
                                x0 = [1/number_of_weights for i in range(number_of_weights)],
                                method = 'SLSQP',
                                bounds = tuple((0.01, 0.2) for _ in range(number_of_weights)),
                                constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})
                                )
    # weights = result.x
    # print(result.x)
    return result

optimize_weights()

results = {}
best_result = -1000
l_results = []

for threshold in np.linspace(0, 15, 30, endpoint=False):
    results[threshold] = optimize_weights(threshold)
    l_results.append(results[threshold].fun)
    if results[threshold].fun > best_result :
        best_result = results[threshold].fun
        best_weights = results[threshold].x
        best_threshold = threshold
    print(threshold, results[threshold].fun, results[threshold].x)

In [None]:
sns.lineplot(l_results)
plt.show()

In [None]:
index = (weights[0] * diff_10_30min + weights[1] * diff_20_30min + weights[2] * diff_30_30min + weights[3] * diff_40_30min + \
            weights[4] * diff_10_60min + weights[5] * diff_20_60min + weights[6] * diff_30_60min + weights[7] * diff_40_60min + \
            weights[8] * diff_10_D + weights[9] * diff_20_D + weights[10] * diff_30_D + weights[11] * diff_40_D
            ).shift().fillna(0)
index[threshold > np.abs(index)] = 0
index = np.tanh(index)

df['index'] = index
df['sign_index'] = np.sign(index).replace(0, np.nan).ffill()
df['cumsum_index'] = np.cumsum(index)

fig, ax = plt.subplots(2, 2, figsize=(25, 12))
ax2 = ax[0][0].twinx()
# sns.lineplot(df['close_30min'].tail(100000), ax = ax1)
sns.lineplot(data = df, x='datetime_30min', y='close_30min', ax = ax[0][0])
sns.lineplot(data = df, x='datetime_30min', y='cumsum_index', ax = ax2, color = 'orange')

sns.lineplot(data = df, x='datetime_30min', y='index', ax = ax[1][0], color = 'orange')

sns.lineplot(np.cumsum(df['Log_Return'] * df['index']), ax = ax[0][1])
sns.lineplot(np.cumsum(df['Log_Return'] * df['sign_index']), ax = ax[1][1])

plt.show()