# Motivation and Goals

When testing statarb, strategies eventually fall apart. Sometimes it is by the predictor slowly drifting off, but often it is by one stock's price shooting upwards - a blowup.

Motivating Question: Can we predict blowups?
- If so, can we profit from it?
- If not, is our pairs trading strategy equally likely to be long/short the stock in question when it blows up?

# Approach

## Visualize
Graph stock prices before blowup. Look for trends

Ideas:
1. Pre-blow up rise (double blow-up, small increase --> much larger increase)
2. Volume indicators (increase in volume --> blow up imminent)
3. Post-blow up descent (blow up --> slow decrease, right-tailed log-returns)

To Graph:
1. Find std. of prices hourly
2. Define blowup = 10*hourly std
3. Graph full 3-month window for each stock where blowup is identified
4. Graph prices of non-blowup stocks
5. Market normalize beforehand??? How many blowups are due to BTCUSDT

In [1]:
""" 
Imports
"""

import glob
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import re
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import coint
from itertools import combinations
from tqdm import tqdm
import typing
from typing import Dict, Union
import collections

In [2]:
data_path = './crypto_data.csv'
df = pd.read_csv(data_path)

start_time = datetime(2024, 6, 1, 0, 0)
end_time = datetime(2024, 8, 25, 0, 0)
test_end_time = datetime(2024, 10, 7, 0, 0)

df = df.set_index('time')
df.head()

Unnamed: 0_level_0,ARKUSDT_open,ARKUSDT_high,ARKUSDT_low,ARKUSDT_close,ARKUSDT_volume,ARKUSDT_turnover,AXLUSDT_open,AXLUSDT_high,AXLUSDT_low,AXLUSDT_close,...,ZRXUSDT_low,ZRXUSDT_close,ZRXUSDT_volume,ZRXUSDT_turnover,GRTUSDT_open,GRTUSDT_high,GRTUSDT_low,GRTUSDT_close,GRTUSDT_volume,GRTUSDT_turnover
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-06-01 00:00:00,0.8052,0.8294,0.8031,0.8268,576568.0,470695.6409,1.034,1.0613,1.0318,1.0531,...,0.519,0.5205,58775.0,30580.0258,0.29844,0.29984,0.29794,0.2992,230545.7,68941.066258
2024-06-01 01:00:00,0.8268,0.8437,0.8127,0.8153,1026640.0,852475.3372,1.0531,1.0654,1.0429,1.0586,...,0.5192,0.5195,25026.0,13013.342,0.2992,0.29975,0.29857,0.29926,223235.0,66801.134862
2024-06-01 02:00:00,0.8153,0.8325,0.8124,0.8266,350101.0,287436.0344,1.0586,1.0653,1.0527,1.0611,...,0.5186,0.5199,63868.0,33181.7229,0.29926,0.29959,0.2986,0.29929,161906.8,48417.999107
2024-06-01 03:00:00,0.8266,0.8318,0.822,0.8247,391127.0,323734.8,1.0611,1.0699,1.0496,1.0508,...,0.5171,0.5174,96745.0,50157.0848,0.29929,0.29968,0.29788,0.29795,447063.9,133582.787508
2024-06-01 04:00:00,0.8247,0.8257,0.8184,0.8235,128711.0,105733.7135,1.0508,1.0529,1.0405,1.0458,...,0.5173,0.5214,95808.0,49866.4207,0.29795,0.29931,0.29761,0.29906,195045.3,58186.611414


In [3]:
tickers = [colName.split('_close')[0] for colName in df.columns if '_close' in colName]
new_columns = {}

for ticker in tickers:
    close = df[f'{ticker}_close']
    logReturn = np.log(close/close.shift(1))
    new_columns[f'{ticker}_logreturn'] = logReturn

new_columns_df = pd.DataFrame(new_columns, index=df.index)


final_df = pd.concat([df, new_columns_df], axis=1)
final_df = final_df.drop(index=final_df.index[0])
final_df = final_df.dropna(how='all', axis=0)
final_df = final_df.dropna(how='all', axis=1)
final_df

Unnamed: 0_level_0,ARKUSDT_open,ARKUSDT_high,ARKUSDT_low,ARKUSDT_close,ARKUSDT_volume,ARKUSDT_turnover,AXLUSDT_open,AXLUSDT_high,AXLUSDT_low,AXLUSDT_close,...,FUNUSDT_logreturn,HMSTRUSDT_logreturn,RENUSDT_logreturn,GRASSUSDT_logreturn,PENDLEUSDT_logreturn,REQUSDT_logreturn,SAGAUSDT_logreturn,ETH-08NOV24_logreturn,ZRXUSDT_logreturn,GRTUSDT_logreturn
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-06-01 01:00:00,0.8268,0.8437,0.8127,0.8153,1026640.0,852475.3372,1.0531,1.0654,1.0429,1.0586,...,0.000398,,-0.005552,,-0.002384,-0.001516,0.002456,,-0.001923,0.000201
2024-06-01 02:00:00,0.8153,0.8325,0.8124,0.8266,350101.0,287436.0344,1.0586,1.0653,1.0527,1.0611,...,-0.002986,,-0.004258,,-0.001202,0.000217,0.010923,,0.000770,0.000100
2024-06-01 03:00:00,0.8266,0.8318,0.8220,0.8247,391127.0,323734.8000,1.0611,1.0699,1.0496,1.0508,...,-0.002995,,-0.000441,,-0.005282,-0.003691,-0.001906,,-0.004820,-0.004487
2024-06-01 04:00:00,0.8247,0.8257,0.8184,0.8235,128711.0,105733.7135,1.0508,1.0529,1.0405,1.0458,...,-0.000200,,0.005285,,0.012039,0.003763,0.014518,,0.007701,0.003719
2024-06-01 05:00:00,0.8235,0.8242,0.8177,0.8208,154720.0,126929.7107,1.0458,1.0914,1.0443,1.0777,...,0.001399,,-0.005285,,0.000048,0.003245,0.001584,,0.000384,-0.001874
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-29 20:00:00,0.5255,0.5257,0.5162,0.5175,786018.0,409075.5295,0.7898,0.7907,0.7807,0.7829,...,-0.006037,-0.009967,-0.003517,0.018570,-0.017631,-0.005047,-0.012133,-0.006883,-0.007453,-0.007818
2024-10-29 21:00:00,0.5175,0.5177,0.5106,0.5138,680436.0,349295.7233,0.7829,0.7929,0.7765,0.7923,...,-0.001595,-0.002075,-0.002985,0.002682,0.011935,0.001444,0.005918,0.001597,-0.000898,0.002947
2024-10-29 22:00:00,0.5138,0.5175,0.5136,0.5152,453657.0,233699.3327,0.7923,0.7965,0.7883,0.7935,...,0.005411,0.003455,0.008391,-0.026025,0.001138,0.000206,0.004890,0.001272,0.004184,-0.000438
2024-10-29 23:00:00,0.5152,0.5185,0.5150,0.5167,248880.0,128563.9955,0.7935,0.8011,0.7909,0.8005,...,0.003170,0.004818,0.000000,-0.066067,0.010558,0.002162,0.014107,0.004939,0.001490,0.001815


In [None]:
# Count which stocks have blowups
ncols = 3
nrows = (len(tickers)+ncols-1)//ncols
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15,200))
actualCount = 0

for tickerCount, ticker in enumerate(tickers):
    logreturn = final_df[f'{ticker}_logreturn']
    return_std = np.std(logreturn)

    blowup = logreturn[logreturn > return_std*10]
    if len(blowup) == 0: continue

    # Plot:
    tickerCount = actualCount
    currRow = tickerCount//ncols
    currCol = tickerCount%ncols
    ax = axs[currRow, currCol]
    
    ax.plot(logreturn)
    # ax.plot(final_df[f'{ticker}_close'])

    tick_locs = final_df.index[::len(final_df)//4]
    ax.set_xticks(tick_locs, tick_locs, rotation=45)
    for day in blowup.index:
        ax.axvline(x=day)
    ax.set_title(ticker)

    actualCount += 1