# Turning point analysis
- Work flow
    1. turn tick data into daily data
        - findataflow
    1. For each maturity
        - detect turing points (N)
            - N = 1, 2, 3, ...
        - combine turning points
    1. summarize turning points by day-to-maturity
        - frequency
        - slope
        - duration
        - optimal lambda


In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
# pretty print for consecutive dfs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

## turn tick data into daily data

In [65]:
import config as cfg
from datetime import datetime

def get_maturity(series_date):
    settlements = pd.to_datetime(pd.read_csv(cfg.PATH_SETTLEMENTS).settlement).dt.date
    maturities = series_date.where(series_date.isin(settlements)).fillna(method='bfill')
    maturities.where(maturities > settlements.min(), inplace = True)
    return maturities

#TODO: need more input for settlement dates

In [56]:
from findataflow.dataprocs import resample

In [63]:
df_OHLC = resample.get_OHLCV_given_frequency(symbol='TXF', freq='1D')
df_OHLC.reset_index(inplace=True)
df_OHLC.rename(columns={'index': 'tx_datetime'}, inplace=True)
df_OHLC['tx_datetime'] = df_OHLC.tx_datetime.dt.date
df_OHLC['maturity'] = get_maturity(df_OHLC.tx_datetime)
df_OHLC.head()

transformed OHLCV at frequency 1D
reading cache of TXF in 1D OHLCV


Unnamed: 0,tx_datetime,open,high,low,close,vol,maturity
0,2011-01-03,9000.0,9030.0,8995.0,9020.0,122732,
1,2011-01-04,9030.0,9030.0,8974.0,8978.0,124896,
2,2011-01-05,8967.0,9005.0,8791.0,8838.0,331662,
3,2011-01-06,8859.0,8876.0,8815.0,8869.0,165020,
4,2011-01-07,8866.0,8883.0,8727.0,8753.0,257504,


In [66]:
df_OHLC.tail()

Unnamed: 0,tx_datetime,open,high,low,close,vol,maturity
1995,2019-02-14,10044.0,10124.0,10034.0,10080.0,193056,2019-02-20
1996,2019-02-15,10040.0,10116.0,10022.0,10042.0,223160,2019-02-20
1997,2019-02-18,10111.0,10161.0,10098.0,10135.0,193704,2019-02-20
1998,2019-02-19,10123.0,10158.0,10104.0,10142.0,143182,2019-02-20
1999,2019-02-20,10183.0,10262.0,10177.0,10256.0,136116,2019-02-20


## turing point at maturity-level
- detect turing points (N)
    - N = 1, 2, 3, ...
- combine turning points

In [30]:
# https://github.com/datasci-info/iin-risky-strategies/blob/master/research/2018_task_turnpt_combination.ipynb
# step 5: detect turning points
def get_marked_turnpt(prc_ohlc, N): 
    assert 'tx_datetime' in prc_ohlc.columns, 'tx_datetime is not in column name list'
    assert 'low' in prc_ohlc.columns, 'low is not in column name list'
    assert 'high' in prc_ohlc.columns, 'high is not in column name list'
    
    df_prc = prc_ohlc[['tx_datetime']].copy()
    window = prc_ohlc[['close']].rolling(window = N * 2 + 1, center = True, min_periods = N+1)
    df_prc['is_turnpt_upward'] = (prc_ohlc.close == window.min().close).values
    df_prc['is_turnpt_downward'] = (prc_ohlc.close == window.max().close).values
    return df_prc

In [70]:
get_marked_turnpt(df_OHLC, N = 1)

Unnamed: 0,tx_datetime,is_turnpt_upward,is_turnpt_downward
0,2011-01-03,False,True
1,2011-01-04,False,False
2,2011-01-05,True,False
3,2011-01-06,False,True
4,2011-01-07,True,False
...,...,...,...
1995,2019-02-14,False,False
1996,2019-02-15,True,False
1997,2019-02-18,False,False
1998,2019-02-19,False,False


In [73]:
for grp, df_grp in df_OHLC.groupby('maturity'):
    df_tp = get_marked_turnpt(df_grp, N = 1)
    break

df_tp