data from https://stooq.com/db/h/

time is in CEST (Central European Summer Time)

### Initialization

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta, time, datetime


import warnings
from IPython.core.display import display, HTML
warnings.filterwarnings("ignore") # suppress warnings because theyre annoying and i like living on the edge
pd.set_option('display.max_columns',100) # so i can see all columns
np.set_printoptions(linewidth = 220) # increase numpy array print
display(HTML("<style>.container { width:80% !important; }</style>")) # widen jupyter notebook blocks

In [3]:
work_dir = 'D:/5_us_txt/data/5 min/us/nasdaq stocks/2/'
df = pd.read_csv(work_dir + 'tsla.us.txt')

In [4]:
cols = ['ticker','period','date','time',
        'open','high','low','close','vol','openint']
df.columns = cols

In [5]:
# datetime in PST
df['datetime'] = pd.to_datetime(df['date'].astype(str) + df['time'].astype(str), format = '%Y%m%d%H%M%S') - timedelta(hours = 9)

### Data Mining

In [5]:
# volume of last six 5-minute intervals (30 min total) 
for i in range(1,7):
    df['vol_lag_' + str(i)] = df['vol'].shift(periods = i).fillna(0)

# total volume last five 5-minute intervals (30 min total)
df['tot_vol_5_lag'] = df[list(map(lambda x: 'vol_lag_' + str(x),range(1,6)))].apply(sum, axis = 1)

In [6]:
df['val_30_min_ahead'] = df['open'].shift(periods = -6).fillna(0)
df = df[df['val_30_min_ahead'] != 0].copy()

In [15]:
piv = df.pivot_table(index = 'date', values = 'vol', aggfunc = [np.sum, np.min, np.max])
piv.columns = ['sum_','min_','max_']
daily_vol_tot_dict = dict(zip(piv.index, piv.sum_))
daily_vol_min_dict = dict(zip(piv.index, piv.min_))
daily_vol_max_dict = dict(zip(piv.index, piv.max_))

In [11]:
piv = df.pivot_table(index = 'date', values = 'vol', aggfunc = [np.sum, np.min, np.max])
piv.columns = ['sum_','min_','max_']
daily_vol_tot_dict = dict(zip(piv.index, piv.sum_))
daily_vol_min_dict = dict(zip(piv.index, piv.min_))
daily_vol_max_dict = dict(zip(piv.index, piv.max_))

Unnamed: 0,ticker,period,date,time,open,high,low,close,vol,openint,datetime,vol_lag_1,vol_lag_2,vol_lag_3,vol_lag_4,vol_lag_5,vol_lag_6,tot_vol_5_lag,val_30_min_ahead
0,TSLA.US,5,20200706,153000,1276.69,1284.27,1266.04,1281.71,520562,0,2020-07-06 06:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1303.90
1,TSLA.US,5,20200706,153500,1281.72,1282.45,1272.13,1274.68,138299,0,2020-07-06 06:35:00,520562.0,0.0,0.0,0.0,0.0,0.0,520562.0,1306.66
2,TSLA.US,5,20200706,154000,1275.18,1294.00,1270.07,1292.54,286518,0,2020-07-06 06:40:00,138299.0,520562.0,0.0,0.0,0.0,0.0,658861.0,1302.17
3,TSLA.US,5,20200706,154500,1292.94,1298.00,1288.35,1293.00,315377,0,2020-07-06 06:45:00,286518.0,138299.0,520562.0,0.0,0.0,0.0,945379.0,1300.27
4,TSLA.US,5,20200706,155000,1293.48,1297.81,1286.67,1297.38,203295,0,2020-07-06 06:50:00,315377.0,286518.0,138299.0,520562.0,0.0,0.0,1260756.0,1294.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,TSLA.US,5,20200825,210500,2002.06,2004.00,2000.00,2002.00,35000,0,2020-08-25 12:05:00,37968.0,47671.0,31029.0,43056.0,25066.0,32136.0,184790.0,2009.54
2876,TSLA.US,5,20200825,211000,2002.18,2011.97,2001.00,2010.81,55399,0,2020-08-25 12:10:00,35000.0,37968.0,47671.0,31029.0,43056.0,25066.0,194724.0,2011.17
2877,TSLA.US,5,20200825,211500,2010.83,2020.00,2010.70,2015.17,133251,0,2020-08-25 12:15:00,55399.0,35000.0,37968.0,47671.0,31029.0,43056.0,207067.0,2017.09
2878,TSLA.US,5,20200825,212000,2014.40,2016.00,2011.52,2011.64,55384,0,2020-08-25 12:20:00,133251.0,55399.0,35000.0,37968.0,47671.0,31029.0,309289.0,2017.75
