# Find spike days

## Load all companies

In [1]:
import numpy as np
import pandas as pd
import ta_lib
import matplotlib.pyplot as plt

In [2]:
from db_operation import DBService

db = DBService()

db.connect()

companies = db.get_companies()

companies.head()

Unnamed: 0,symbol,name,ipo_year,sector,industry
0,AAPC,Atlantic Alliance Partnership Corp.,2015,Consumer Services,Real Estate Investment Trusts
1,ABTL,Autobytel Inc.,1999,Technology,"Computer Software: Programming, Data Processing"
2,ACCP,"Accelerated Pharma, Inc.",0,,
3,AAN,"Aaron&#39;s, Inc.",0,Technology,Diversified Commercial Services
4,AAWW,Atlas Air Worldwide Holdings,0,Transportation,Transportation Services


## Get all quotes

In [3]:
%%time
#< symbol, quote list>
all_quotes = db.get_all_quotes()
print('Load success:', len(all_quotes))

Load success: 4114
Wall time: 4min 16s


## Reports of  大盘


In [None]:
reports = []
target_dt =  -1
period = 300
for sector, group in companies[['symbol', 'sector']].groupby(['sector']):
    
    for i in range(period):
        cur_dt = target_dt - i
        up_count = 0
        down_count = 0        
        for symbol in group['symbol']:
            if symbol in all_quotes and len(all_quotes[symbol]) > period:
                quote_dt = all_quotes[symbol]['quote_date'].iloc[cur_dt]
                close_p = all_quotes[symbol]['close']

                if close_p.iloc[cur_dt] > close_p.iloc[cur_dt - 1]:
                    up_count += 1
                else:
                    down_count += 1
        reports.append([quote_dt ,sector, up_count, down_count, up_count + down_count, up_count / (up_count + down_count)])

reports = pd.DataFrame(reports, columns = ['quote_dt', 'sector', 'up_count', 'down_count', 'total', 'prec'])
print(reports)
print(reports['total'].sum(), reports['up_count'].sum(), reports['down_count'].sum())

## Find Z-Pattern

In [7]:
def basic_filter(quotes, index):
    PERIOD = 20
    # filter price
    period_quotes = quotes.iloc[index - PERIOD : index]
    
    if np.min(period_quotes.low) < 3:
        return False
    
    # filter volume
    if np.mean(period_quotes.volume) < 100000:
        return False
    
    return True
    

In [16]:
def z_pattern(quotes, index):
    ''' Up trend with some great pos volume and following without great neg volume'''
    PERIOD = 14
    # spike volume muiltple
    VOLUME_MIN_THRESHOLD = 3
    VOLUME_MAX_THRESHOLD = 8
    # positive spike number in period
    UP_TIMES = 3
    
    # ---------------- past filter -----------------
    period_quotes = quotes.iloc[index - PERIOD + 1: index + 1]
    prev_sma_vol = np.mean(quotes.iloc[index - PERIOD * 2 + 1: index - PERIOD + 1].volume)
    
    # filter volume: too small is danger
    if prev_sma_vol < 100000:
        return False
    
    # price filter: price in up trend
    past_price = np.max(quotes.iloc[index - PERIOD * 2 : index - PERIOD].close)
    cur_price = np.max(quotes.iloc[index - PERIOD : index + 1].high)
    
    #past_sma_price = ta_lib.SMA(quotes.close, index, PERIOD)
    #cur_sma_price = ta_lib.SMA(quotes.close, index - PERIOD, PERIOD)
    
    # < to find bottom ?
    if past_price > cur_price:
        return False
    
    target_quotes = period_quotes[period_quotes.volume > prev_sma_vol * VOLUME_MIN_THRESHOLD]
    pos_vol = target_quotes[target_quotes.volume < prev_sma_vol * VOLUME_MAX_THRESHOLD][target_quotes.close > target_quotes.open].volume
    neg_vol = target_quotes[target_quotes.close < target_quotes.open].volume
    
    # pos volume bigger than neg volume
    if len(pos_vol) < UP_TIMES or np.sum(pos_vol) < np.sum(neg_vol) * 2:
        return False

    # -------------- future filter -----------------
    future_quotes = quotes.iloc[index : min(index + PERIOD, len(quotes))]
    future_target_quotes = future_quotes[future_quotes.volume > prev_sma_vol * VOLUME_MIN_THRESHOLD]
    future_neg_vols = future_target_quotes[future_target_quotes.close < future_target_quotes.open].volume
    
    if np.sum(future_neg_vols) * 2 > np.sum(pos_vol):
        return False
    
    res_i = back_angular(quotes, index, 0)
    if res_i is not None and quotes.iloc[res_i].high * 0.97 > np.min(future_quotes.close):
        return False
    
    if res_i is not None and quotes.iloc[res_i].high * 1.1 < np.min(quotes.iloc[index + 10 : min(index + 100, len(quotes))].close):
        return False


In [23]:
def angular(quotes, index, direc):
    '''direc : 0 - resist angular, 1 - support angular'''
    HALF_OB_PERIOD = 5

    try:
        ob_quos = quotes.iloc[index - HALF_OB_PERIOD : index + HALF_OB_PERIOD]

        if direc == 0:
            if np.max(ob_quos.high) <= quotes.iloc[index].high:
                return True
            else:
                return False
        elif direc == 1:
            if np.min(ob_quos.low) >= quotes.iloc[index].low:
                return True
            else:
                return False
    except:
        pass

    return False

def back_angular(quotes, index, direc):
    i = index
    while i >= 0:
        if angular(quotes, i, direc):
            return i
        i -= 1
    return None
        


def sp_plot(quotes, index, period):

    try:
        
        #if basic_filter(quotes, index, period):
        #   return False
        
        # for plot
        res_index = []
        res_price = []

        sup_index = []
        sup_price = []
    
        # find support and resist point
        base_i = index - period
        i = index - period
        while i <= index:
            
            if angular(quotes, i, 0):
                res_index.append(i - base_i)
                res_price.append(quotes.iloc[i].high)
            if angular(quotes, i, 1):
                sup_index.append(i - base_i)
                sup_price.append(quotes.iloc[i].low)
            
            i += 1
        
        plt.plot(list(range(period)), quotes.iloc[index - period : index].close)
        plt.plot(list(range(period)), quotes.iloc[index - period : index].high, 'y:')
        plt.plot(list(range(period)), quotes.iloc[index - period : index].low, 'y:')
        plt.plot(res_index, res_price, "ro")
        plt.plot(sup_index, sup_price, "go")
        plt.show()
    except:
        pass
    
    return False

In [17]:
%%time

# count of spike day
count = 0
# observing date period
start_dt = -300
target_dt = -250

for symbol, quotes in all_quotes.items():
    if len(quotes) < abs(start_dt) + 20:
        continue
    last_i = -6
    for i in range(len(quotes) + start_dt, len(quotes) + target_dt):
        
        if not basic_filter(quotes, i):
            continue
        if(z_pattern(quotes, i)):
    
            if i - last_i > 5:
                count += 1
                print(count, symbol, quotes.iloc[i].quote_date, sep = '\t')
                #sp_plot(quotes, i + 100, 200)
            last_i = i
            





20170309.0
1	TTD	20161118.0
20170309.0
20170309.0
20170309.0
20170309.0
20170309.0
20170309.0




20161215.0
2	OCFC	20161215.0
20161216.0
20161117.0
3	VRX	20161117.0
20161202.0
4	ARA	20161201.0
20170504.0
5	CONN	20161215.0
20170504.0
20170504.0
20170504.0
20170504.0
20170504.0
20170504.0
20161216.0
6	BNED	20161216.0
20161219.0
20161220.0
20161221.0
20170327.0
7	AXTI	20170113.0
20170327.0
20170327.0
20170327.0
20170327.0
20170327.0
20161215.0
8	LADR	20161215.0
20161219.0
20170307.0
20170307.0
20161118.0
9	TISI	20161118.0
20161121.0
20161122.0
10	PZE	20161109.0
20161122.0
20161122.0
20161122.0
20161115.0
11	MWA	20161111.0
20161115.0
20161115.0
20161116.0
20161118.0
12	TGH	20161111.0
20161118.0
20161118.0
20161118.0
20161118.0
20161121.0
20161125.0
20161125.0
20161128.0
20161121.0
13	SHAK	20161116.0
20161121.0
20161121.0
20161121.0
20161115.0
14	AMKR	20161109.0
20161115.0
20161115.0
20161115.0
20161115.0
20161116.0
20161117.0
20161117.0
15	HRI	20161109.0
20161117.0
20161117.0
20161117.0
20161122.0
16	WLB	20161116.0
20161122.0
20161122.0
20161122.0
20161121.0
17	LOPE	20161109.0
2016112

KeyboardInterrupt: 

##  Find consolidation pattern

In [None]:
import ta_lib
import matplotlib.pyplot as plt

In [None]:
def volitility(quotes, index, period):
    atr = ta_lib.ATR(quotes, index, period)
    avg = np.mean(quotes.iloc[index - period + 1 : index + 1].close)
    return atr / avg

def is_consolidation(quotes, index, period = 20):
    # range threhold
    RANGE_THREHOLD = 0.1
    
    pre_volitility = volitility(quotes, index - period, period * 2)
    cur_volitility = volitility(quotes, index, period)
    
    if pre_volitility > (cur_volitility * 1.5):
        return True
    else:
        return False
    '''
    quotes_period = quotes.iloc[index - period + 1: index + 1]
    max_high_price = np.max(quotes_period.close)
    min_low_price = np.min(quotes_period.close)
    
    sma = np.mean(quotes_period.close)
    
    return (max_high_price - min_low_price) / sma < RANGE_THREHOLD
    '''

In [None]:
def consolidation_days(quotes, index, min_period = 20):
    VOLITILITY_RATE = 1.5
    
    days = 0
    i = min_period
    while index - i >= 0:
        pre_volitility = volitility(quotes, index - i, i)
        cur_volitility = volitility(quotes, index, i)
        if pre_volitility < 0.1:
            break
        if pre_volitility > (cur_volitility * VOLITILITY_RATE) and pre_volitility < (cur_volitility * 3):
            days = i
            i += 1
        else:
            break
    return days

In [None]:
# find reverse bulge
def reverse_bulge_list(quotes):
    ''' Get reverse_bulge day in period [ index - period + 1, index ] '''
    # 0: in; 1: out
    status = 1
    result_list = []
    
    for i in range(0, len(quotes)):
        mi = ta_lib.MI(quotes, i)
        if mi is None:
            continue
        if mi >= 27:
            status = 0
        else:
            if status == 0 and mi <= 26.5:
                status = 1
                result_list.append(i)
    return result_list


In [None]:
def draw(symbol, quotes):
    EXPAND_SIZE = 100
    mi_list = []
    mfi_list = []

    for i in range(len(quotes)):
        index = i
        dt = quotes.iloc[index].quote_date
        mfi = ta_lib.MFI(quotes, index)
        mi = ta_lib.MI(quotes, index)

        mi_list.append(mi)
        mfi_list.append(mfi)

        print(i, dt, mfi, mi, sep = '\t')
        
    mfi_list = np.array(mfi_list)
    long_period = 16
    short_period = 8

    long_ema_mfi = SMA_LIST(mfi_list, long_period)
    short_ema_mfi = SMA_LIST(mfi_list, short_period)   
        
    rb_list = reverse_bulge(quotes)
    for index in rb_list:

        #ta_lib.EMA_LIST(mfi_list, period, len(mfi_list), period)

        plt.figure(1)
        plt.subplot(311)
        plt.plot(mi_list)
        plt.plot([27] * rang, color = 'k')
        plt.plot([26.5] * rang, 'k:')
        plt.title(symbol)
        plt.ylabel('MI')

        plt.subplot(312)
        view_quotes = list([index - EXPAND_SIZE : index + EXPAND_SIZE].close)
        plt.plot(view_quotes)
        plt.ylabel('stock')

        plt.subplot(313)
        plt.plot(mfi_list)
        plt.plot(long_ema_mfi[index - EXPAND_SIZE : index + EXPAND_SIZE], color = 'r')
        plt.plot(short_ema_mfi[index - EXPAND_SIZE : index + EXPAND_SIZE], color = 'g')
        plt.plot([80] * EXPAND_SIZE * 2, color = 'k')
        plt.plot([20] * EXPAND_SIZE * 2, color = 'k')
        plt.plot([60] * EXPAND_SIZE * 2, 'y:')
        plt.plot([40] * EXPAND_SIZE * 2, 'y:')
        plt.plot([50] * EXPAND_SIZE * 2, 'y:')
        plt.ylabel('MFI')
        plt.ylim(0, 100)

        plt.show()
        

In [None]:
def PNV(quote):
    ''' Positive volume and Negative volume'''
    all_len = quote.high - quote.low
    candle_len = quote.close - quote.open
    if candle_len > 0:
        pos_len = all_len
        neg_len = all_len - candle_len
    elif candle_len < 0:
        pos_len = all_len + candle_len
        neg_len = all_len
    else:
        pos_len = neg_len = 1
    
    pos_volume = quote.volume * (pos_len / (pos_len + neg_len))
    neg_volume = quote.volume - pos_volume
    return pos_volume, neg_volume
    

In [None]:
def MFR(quotes, index, period):
    ''' Money Flow Rate: rate of positive money divided by negitive money'''
    pos_volume = 0
    neg_volume = 0
    for i in range(index - period + 1, index + 1):
        p, n = PNV(quotes.iloc[i])
        pos_volume += p
        neg_volume += n
    
    return pos_volume / (pos_volume + neg_volume)


In [None]:
# find fitting MFR
period = 200
for key, value in all_quotes.items():
    
    length = len(value)
    for i in range(length - period, length - 100):
        if i < period:
            continue
            
        if value.iloc[i].low < 2:
            continue
        days = consolidation_days(value, i, 30)
        
        if days > 0:
            volumes = np.mean(value.iloc[i - days + 1 : i + 1].volume)
            mfr = MFR(value, i, days)
            if volumes > 30000 and abs(mfr - 0.5) > 0.05:
                high_p = np.max(value.iloc[i : i + 100].high)
                low_p = np.min(value.iloc[i : i + 100].low)
                print(key, '%.f' % value.iloc[i].quote_date,'%.2f' % mfr, days, value.iloc[i].close, high_p, low_p, sep = '\t')

In [None]:
# find breakout volume
period = 200
for key, value in all_quotes.items():
    if companies[companies.symbol == key].sector.iloc[0] == 'Health Care':
        continue
    
    length = len(value)
    for i in range(length - period, length - 100):
        if i < period:
            continue
            
        if value.iloc[i].low < 2:
            continue
            
        quotes = value.iloc[i - 50 : i]
        sma_volume = np.mean( quotes.volume )
        pre_high_p = np.max(quotes.high)
        
        today = value.iloc[i]
        tomorrow = value.iloc[i + 1]
        if today.volume > sma_volume * 5 and today.close > today.open and today.close > pre_high_p:
            exam_quotes = value.iloc[i + 1 : i + 51]
            high_p = np.max(exam_quotes.high)
            high_day = np.argmax(exam_quotes.high)
            low_p = np.min(exam_quotes.low)
            low_day = np.argmin(exam_quotes.low)
            if low_p > pre_high_p * 0.95:   
                print(key, '%.f' % today.quote_date, '%.2f'% (today.close / pre_high_p - 1), '%.2f(%d)' % (high_p / pre_high_p - 1, high_day - i) , '%.2f(%d)' % (low_p / pre_high_p - 1, low_day - i), sep = '\t')

In [None]:
all_quotes['SNAP'].open