In [43]:
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from plotly.subplots import make_subplots

import pytz

In [44]:
def plot_candlestick(df, anomaly, title):
    
    # fig = go.Figure()
    fig = make_subplots(specs=[[{'secondary_y': True}]])
    
    fig.add_trace(
        go.Candlestick(
            x=df['datetime'],
            open=df['open'],
            low=df['low'],
            high=df['high'],
            close=df['close'],
            showlegend=False,
        )
    )

    fig.add_trace(
        go.Bar(
            x=df['datetime'],
            y=df['volume'],
            opacity=0.5,
            showlegend=False,
            name='Volume'
        ),
        secondary_y=True
    )
    
    for idx in anomaly:
        if idx == df.index[0]:
            x0 = df.loc[idx, 'datetime']
        else:
            x0 = df.loc[(idx-1), 'datetime']
        
        if idx == df.index[-1]:
            x1 = df.loc[df.index.max, 'datetime']
        else:
            x1 = df.loc[(idx+1), 'datetime']

        fig.add_vrect(
            x0=x0,
            x1=x1,
            fillcolor='salmon',
            opacity=0.2,
            line_width=0,
        )
    
    fig.update_layout(
        title=f'{title}',
        xaxis_title='Datetime',
        yaxis_title='Price ($)',
        xaxis_rangeslider_visible=False,
        width=2000,
        height=500,
        # template='plotly_dark',
        template='plotly'
    )
    
    fig.update_xaxes(rangebreaks = [{'bounds': ['sat', 'mon']}])
    
    fig.show()

In [45]:
df_wti = pd.read_csv(
    './data/backtestmarket/cl-15m.csv',
    sep=';', header=0,
    names=['date', 'time', 'open', 'high', 'low', 'close', 'volume']
)

df_wti['date'] = pd.to_datetime(df_wti['date'], format='%d/%m/%Y')
df_wti['date'] = df_wti['date'].dt.strftime('%m-%d-%Y')

df_wti['datetime'] = pd.to_datetime(df_wti['date'] + ' ' + df_wti['time'])

df_wti['datetime'] = df_wti['datetime'].dt.tz_localize('Etc/GMT+6')
ny_tz = pytz.timezone('America/New_York')
df_wti['datetime'] = df_wti['datetime'].dt.tz_convert(ny_tz)
df_wti['datetime'] = pd.to_datetime(df_wti['datetime'])
df_wti['date'] = df_wti['datetime'].dt.strftime('%Y-%m-%d')
df_wti['time'] = df_wti['datetime'].dt.strftime('%H%M')
df_wti['day'] = df_wti['datetime'].dt.strftime('%A')

# take only wednesday data for 10 years
df_wti = df_wti[(df_wti['datetime'].dt.hour >= 9) & (df_wti['datetime'].dt.hour < 16)]
df = df_wti[(df_wti['datetime'].dt.weekday == 2) & (df_wti['datetime'].dt.year >= 2010) & (df_wti['datetime'].dt.year <= 2019)]
df.head()

Unnamed: 0,date,time,open,high,low,close,volume,datetime,day
65918,2010-01-06,900,81.84,81.92,81.67,81.8,5818,2010-01-06 09:00:00-05:00,Wednesday
65919,2010-01-06,915,81.8,81.92,81.67,81.76,3187,2010-01-06 09:15:00-05:00,Wednesday
65920,2010-01-06,930,81.76,81.9,81.67,81.73,4522,2010-01-06 09:30:00-05:00,Wednesday
65921,2010-01-06,945,81.75,81.87,81.62,81.79,5741,2010-01-06 09:45:00-05:00,Wednesday
65922,2010-01-06,1000,81.79,81.97,81.74,81.87,4907,2010-01-06 10:00:00-05:00,Wednesday


Check for anomalies that happen between 1030 and 1130

In [46]:
df_result = []
for day, df_day in df.groupby(pd.Grouper(key='datetime', freq='D')):
    # check for volume anomaly, use sma over the past hour
    sma = df_day['volume'].rolling(window=4).mean()
    threshold = sma.std() * 2
    df_anomaly = df_day[df_day['volume'] > (sma + threshold)]
    df_anomaly = df_anomaly[
        (df_anomaly['datetime'].dt.time >= pd.to_datetime('10:30').time()) &
        (df_anomaly['datetime'].dt.time <= pd.to_datetime('11:30').time())
    ]
    
    # skip if no anomalous volume spikes
    if df_anomaly.shape[0] == 0:
        continue
    
    # let's consider only the earliest anomaly if there are multiple ones between 1030 and 1130
    df_anomaly = df_anomaly.iloc[0]

    # get data from past 30 days to compute mean / std
    date_30_days_ago = day.date() - pd.Timedelta(days=30)
    df_30 = df_wti[(df_wti['datetime'].dt.date >= date_30_days_ago) & (df_wti['datetime'].dt.date <= day.date())]
    
    # compute mean / std of close price with data from past 30 days
    mean = df_30['close'].mean()
    std = df_30['close'].std()

    # price at volume spike, compute z score
    price = df_anomaly['close']
    z_score = (price - mean) / std

    # track when it settles back to mean, allow +- 0.2 difference
    df_future = df_wti[(df_wti['datetime'] > df_anomaly['datetime'])]
    upper = mean + 0.2
    lower = mean - 0.2
    df_settle = df_future[(df_future['close'] >= lower) & (df_future['close'] <= upper)].iloc[0]

    # record some stats
    time_revert_mean = df_settle['datetime'] - df_anomaly['datetime']
    df_anomaly['z_score'] = z_score
    df_anomaly['past_30_mean'] = mean
    df_anomaly['past_30_std'] = std
    df_anomaly['settled_price'] = df_settle['close']
    df_anomaly['settled_datetime'] = df_settle['datetime']
    df_result.append(df_anomaly)

df_result = pd.DataFrame(df_result)
df_result

Unnamed: 0,date,time,open,high,low,close,volume,datetime,day,z_score,past_30_mean,past_30_std,settled_price,settled_datetime
65924,2010-01-06,1030,82.28,82.32,80.85,81.24,35422,2010-01-06 10:30:00-05:00,Wednesday,1.453802,75.398273,4.018240,75.59,2010-01-22 09:15:00-05:00
66390,2010-01-13,1030,79.32,79.32,78.37,78.87,27764,2010-01-13 10:30:00-05:00,Wednesday,0.261756,77.793454,4.112779,77.92,2010-01-15 13:30:00-05:00
67300,2010-01-27,1030,74.58,74.73,73.90,74.15,28633,2010-01-27 10:30:00-05:00,Wednesday,-1.809115,79.046479,2.706560,79.05,2010-02-18 14:15:00-05:00
67765,2010-02-03,1030,77.27,77.69,76.70,77.11,27000,2010-02-03 10:30:00-05:00,Wednesday,-0.314959,78.126430,3.227186,78.20,2010-02-18 09:30:00-05:00
69141,2010-02-24,1030,78.83,79.44,78.44,79.30,21033,2010-02-24 10:30:00-05:00,Wednesday,1.476071,75.509858,2.567724,75.67,2010-05-06 15:45:00-04:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298683,2019-10-30,1100,55.24,55.38,54.89,55.20,26044,2019-10-30 11:00:00-04:00,Wednesday,0.971911,53.994907,1.239921,53.89,2019-10-31 11:00:00-04:00
300065,2019-11-20,1030,55.75,56.39,55.75,56.18,48110,2019-11-20 10:30:00-05:00,Wednesday,0.028458,56.145932,1.197161,56.13,2019-11-29 10:30:00-05:00
300525,2019-11-27,1030,58.36,58.54,58.04,58.11,38736,2019-11-27 10:30:00-05:00,Wednesday,1.183581,56.807220,1.100710,56.63,2019-11-29 09:45:00-05:00
301416,2019-12-11,1030,59.10,59.10,58.21,58.23,70795,2019-12-11 10:30:00-05:00,Wednesday,0.599592,57.601059,1.048948,57.54,2020-01-15 10:30:00-05:00


In [50]:
df_result[df_result['z_score'].abs() >= 1.5].to_csv('./outputs/wednesday_anomaly.csv', index=False)