In [1]:
import pandas as pd
import plotly.graph_objects as go

# Raw Data

In [2]:
raw = pd.read_parquet("../data/raw")
raw.reset_index(inplace=True)
raw.set_index("timestamp", inplace=True)

In [3]:
raw.columns

Index(['symbol', 'open', 'high', 'low', 'close', 'volume', 'trade_count',
       'vwap'],
      dtype='object')

In [4]:
raw.describe()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap
count,1257.0,1257.0,1257.0,1257.0,1257.0,1257.0,1257.0
mean,355.711789,357.984437,353.240624,355.771625,85999360.0,536061.1,355.680489
std,64.499568,64.754104,64.195442,64.51069,43988500.0,382878.6,64.471478
min,228.19,229.6833,218.26,222.68,20538060.0,91922.0,224.675822
25%,291.54,293.16,290.4,291.82,57958750.0,301908.0,291.962582
50%,363.13,364.38,359.17,362.58,75513770.0,442250.0,361.703984
75%,411.86,414.15,409.8795,412.2,99651010.0,623749.0,411.997087
max,479.22,479.98,476.06,477.71,392221700.0,3709928.0,477.868706


In [5]:
raw.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1257 entries, 2018-06-07 04:00:00+00:00 to 2023-06-05 04:00:00+00:00
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   symbol       1257 non-null   object 
 1   open         1257 non-null   float64
 2   high         1257 non-null   float64
 3   low          1257 non-null   float64
 4   close        1257 non-null   float64
 5   volume       1257 non-null   float64
 6   trade_count  1257 non-null   float64
 7   vwap         1257 non-null   float64
dtypes: float64(7), object(1)
memory usage: 88.4+ KB


In [6]:
f"the start date is {raw.index[0].date()} and the end date is {raw.index[-1].date()}"

'the start date is 2018-06-07 and the end date is 2023-06-05'

In [7]:
fig = go.Figure(data=[go.Candlestick(x=raw.index, open=raw['open'], high=raw['high'], low=raw['low'], close=raw['close'])])

fig.update_layout(title=raw["symbol"].iloc[0].upper(), width=1200)

fig.show()

# Feautures Data

In [8]:
features = pd.read_parquet("../data/features/")

In [9]:
features.head()

Unnamed: 0_level_0,symbol,vwap,SPY_NATR_RANK,SPY_AROON_RANK,SPY_RSI_RANK,SPY_ROC_RANK,SPY_RTNS_RANK,SPY_VWAP_RANK
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-08-17 04:00:00+00:00,SPY,284.606982,1.0,0.290323,1.0,1.0,0.72,0.941176
2018-08-20 04:00:00+00:00,SPY,285.568037,0.5,0.296875,1.0,0.5,0.627451,0.980769
2018-08-21 04:00:00+00:00,SPY,286.583994,0.333333,0.469697,1.0,1.0,0.596154,1.0
2018-08-22 04:00:00+00:00,SPY,286.230403,0.25,0.470588,0.75,0.25,0.415094,0.981481
2018-08-23 04:00:00+00:00,SPY,285.995389,0.2,0.471429,0.4,1.0,0.314815,0.963636


In [10]:
features.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1207 entries, 2018-08-17 04:00:00+00:00 to 2023-06-05 04:00:00+00:00
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   symbol          1207 non-null   object 
 1   vwap            1207 non-null   float64
 2   SPY_NATR_RANK   1207 non-null   float64
 3   SPY_AROON_RANK  1207 non-null   float64
 4   SPY_RSI_RANK    1207 non-null   float64
 5   SPY_ROC_RANK    1207 non-null   float64
 6   SPY_RTNS_RANK   1207 non-null   float64
 7   SPY_VWAP_RANK   1207 non-null   float64
dtypes: float64(7), object(1)
memory usage: 84.9+ KB


In [11]:
title = f"{features['symbol'].iloc[0].upper()} Feautures"

scatter = go.Figure()

for feature in [i for i in features.columns if i.startswith("SPY")]:
    scatter.add_trace(go.Scatter(x=features.index, y=features[feature], name=feature))

scatter.update_layout(title=title, width=1200, legend=dict(orientation="h", yanchor="bottom", y=1.02))

scatter.show()

# Results

In [15]:
import os

In [28]:
bestpath = [i for i in os.listdir("../data/brute_results/") if i.startswith("best")][0]
bestpath = f"../data/brute_results/{bestpath}"
resultspath = [i for i in os.listdir("../data/brute_results/") if i.startswith("results")][0]
resultspath = f"../data/brute_results/{resultspath}"

In [29]:
best = pd.read_json(bestpath)
results = pd.read_csv(resultspath, index_col=0)

In [30]:
best

Unnamed: 0,Fast,Slow,CAGR,Sharpe,Drawdown,Returns
0,10,124,0.1,0.82,-0.18,1.619843


In [31]:
results

Unnamed: 0_level_0,Fast,Slow,CAGR,Sharpe,Drawdown,Returns
Trial,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,10,124,0.10,0.82,-0.18,1.619843
0,10,123,0.10,0.79,-0.18,1.596511
0,10,122,0.10,0.79,-0.18,1.596511
0,11,122,0.10,0.79,-0.18,1.593044
0,11,124,0.10,0.78,-0.19,1.587862
...,...,...,...,...,...,...
0,43,69,-0.01,0.02,-0.27,1.012153
0,41,68,-0.01,0.01,-0.27,1.007658
0,43,71,-0.01,0.01,-0.27,1.003885
0,43,72,-0.01,0.00,-0.27,1.001032
