# Find spike days

## Load all companies

In [1]:
import numpy as np
import pandas as pd

In [2]:
from db_operation import DBService

db = DBService(host = "202.118.75.113")

db.connect()

companies = db.get_companies()

companies.head()

Unnamed: 0,symbol,name,ipo_year,sector,industry
0,AAPC,Atlantic Alliance Partnership Corp.,2015,Consumer Services,Real Estate Investment Trusts
1,ABTL,Autobytel Inc.,1999,Technology,"Computer Software: Programming, Data Processing"
2,ACCP,"Accelerated Pharma, Inc.",0,,
3,ABMD,"ABIOMED, Inc.",0,Health Care,Medical/Dental Instruments
4,ABUS,Arbutus Biopharma Corporation,0,Health Care,Major Pharmaceuticals


## Get all quotes

In [3]:
%%time
#< symbol, quote list>
all_quotes = db.get_all_quotes()
print('Load success:', len(all_quotes))

Load success: 4105
Wall time: 2min 20s


## Reports of  大盘


In [97]:
reports = []
target_dt =  -1
period = 300
for sector, group in companies[['symbol', 'sector']].groupby(['sector']):
    
    for i in range(period):
        cur_dt = target_dt - i
        up_count = 0
        down_count = 0        
        for symbol in group['symbol']:
            if symbol in all_quotes and len(all_quotes[symbol]) > period:
                quote_dt = all_quotes[symbol]['quote_date'].iloc[cur_dt]
                close_p = all_quotes[symbol]['close']

                if close_p.iloc[cur_dt] > close_p.iloc[cur_dt - 1]:
                    up_count += 1
                else:
                    down_count += 1
        reports.append([quote_dt ,sector, up_count, down_count, up_count + down_count, up_count / (up_count + down_count)])

reports = pd.DataFrame(reports, columns = ['quote_dt', 'sector', 'up_count', 'down_count', 'total', 'prec'])
print(reports)
print(reports['total'].sum(), reports['up_count'].sum(), reports['down_count'].sum())

      quote_dt            sector  up_count  down_count   total      prec
0     20171113  Basic Industries        93         119     212  0.438679
1     20171110  Basic Industries       176         248     424  0.415094
2     20171109  Basic Industries       234         402     636  0.367925
3     20171108  Basic Industries       335         513     848  0.395047
4     20171107  Basic Industries       400         660    1060  0.377358
5     20171106  Basic Industries       527         745    1272  0.414308
6     20171103  Basic Industries       617         867    1484  0.415768
7     20171102  Basic Industries       713         983    1696  0.420401
8     20171101  Basic Industries       818        1090    1908  0.428721
9     20171031  Basic Industries       940        1180    2120  0.443396
10    20171030  Basic Industries      1008        1324    2332  0.432247
11    20171027  Basic Industries      1113        1431    2544  0.437500
12    20171026  Basic Industries      1230        1

## Find spike

In [13]:
def basic_filter(quotes, index):
    PERIOD = 20
    # filter price
    period_quotes = quotes.iloc[index - PERIOD : index]
    
    if np.min(period_quotes.low) < 2:
        return False
    
    # filter volume
    if np.mean(period_quotes.volume) < 100000:
        return False
    
    return True
    

In [26]:
def up_period(quotes, index):
    SMA_PERIOD = 20
    UP_THRESHOLD = 5
    UP_TIMES = 3
    
    period_quotes = quotes.iloc[index - SMA_PERIOD:index]
    sma_vol = np.mean(quotes.iloc[index - SMA_PERIOD * 2 : index - SMA_PERIOD].volume)
    
    prev_sma_price = np.mean(quotes.iloc[index - SMA_PERIOD * 2 : index - SMA_PERIOD].close)
    cur_sma_price = np.mean(period_quotes.close)
    
    if prev_sma_price > cur_sma_price:
        return False
    
    target_quotes = period_quotes[period_quotes.volume > sma_vol * UP_THRESHOLD]
    pos_vol = target_quotes[target_quotes.close > target_quotes.open].volume
    neg_vol = target_quotes[target_quotes.close < target_quotes.open].volume
    
    if len(pos_vol) >= UP_TIMES and np.sum(pos_vol) > np.sum(neg_vol) * 2:
        return True
    else:
        return False

In [27]:
%%time

# count of spike day
count = 0
# observing date period and sma period
start_dt = -100
target_dt = -50

for symbol, quotes in all_quotes.items():
    if len(quotes) < abs(start_dt) + sma_period:
        continue
    cur_dt = start_dt
    
    while cur_dt < target_dt:
        
        if not basic_filter(quotes, cur_dt):
            cur_dt += 1
            continue
        if(up_period(quotes, cur_dt)):
            count += 1
            print(count, symbol, quotes.iloc[cur_dt].quote_date, sep = '\t')
            
        cur_dt += 1



1	ABT	20170801.0
2	ABT	20170802.0
3	ABT	20170803.0
4	ABT	20170804.0
5	ABT	20170807.0
6	ABT	20170808.0
7	ABT	20170809.0
8	ABT	20170810.0
9	ABT	20170811.0
10	ABT	20170814.0
11	ABT	20170815.0
12	ABT	20170816.0
13	ABT	20170817.0
14	ABT	20170818.0
15	ABT	20170821.0
16	ABT	20170822.0
17	ABT	20170823.0
18	ABT	20170824.0
19	ABT	20170905.0
20	ABT	20170906.0
21	ABT	20170907.0
22	ABT	20170908.0
23	ABT	20170911.0
24	ABT	20170912.0
25	ABT	20170913.0
26	ABT	20170914.0
27	ABT	20170915.0
28	ABT	20170918.0
29	ABT	20170919.0
30	ABT	20170920.0
31	ABT	20170921.0
32	ABT	20170922.0
33	ABT	20170925.0
34	ABT	20170926.0
35	ABT	20170927.0
36	ABT	20170928.0
37	ABT	20170929.0
38	ABT	20171002.0
39	ABT	20171003.0
40	ABT	20171004.0
41	ABT	20171005.0
42	ABT	20171006.0
43	ABT	20171009.0
44	ABT	20171010.0
45	ACCO	20170801.0
46	ACCO	20170802.0
47	ACCO	20170803.0
48	ACCO	20170804.0
49	ACCO	20170807.0
50	ACCO	20170808.0
51	ACCO	20170809.0
52	ACCO	20170810.0
53	ACCO	20170922.0
54	ACCO	20170925.0
55	ACCO	20170926.0
56	ACCO	

433	AAWW	20170913.0
434	AAWW	20170914.0
435	AAWW	20170915.0
436	AAWW	20170918.0
437	AAWW	20170919.0
438	AAWW	20170920.0
439	AAWW	20170921.0
440	AAWW	20170922.0
441	AAWW	20170925.0
442	AAWW	20170926.0
443	AAWW	20170927.0
444	AAWW	20170928.0
445	AAWW	20170929.0
446	AAWW	20171002.0
447	AAWW	20171003.0
448	AAWW	20171004.0
449	AAWW	20171005.0
450	AAWW	20171006.0
451	AAWW	20171009.0
452	AAWW	20171010.0
453	AAP	20171006.0
454	AAP	20171009.0
455	AAP	20171010.0
456	AB	20170801.0
457	AB	20170802.0
458	AB	20170803.0
459	AB	20170804.0
460	AB	20170807.0
461	AB	20170808.0
462	AB	20170809.0
463	AB	20170810.0
464	AB	20170811.0
465	AB	20170814.0
466	AB	20170815.0
467	AB	20170816.0
468	AB	20170817.0
469	AB	20170818.0
470	AB	20170821.0
471	AB	20170822.0
472	AB	20170928.0
473	AB	20170929.0
474	AB	20171002.0
475	AB	20171003.0
476	AB	20171004.0
477	AB	20171005.0
478	AB	20171006.0
479	AB	20171009.0
480	AB	20171010.0
481	ABBV	20170801.0
482	ABBV	20170802.0
483	ABBV	20170803.0
484	ABBV	20170804.0
485	ABBV	2017

989	AET	20171004.0
990	AET	20171005.0
991	AET	20171006.0
992	AET	20171009.0
993	AET	20171010.0
994	ADNT	20170801.0
995	ADNT	20170802.0
996	ADNT	20170803.0
997	ADNT	20170804.0
998	ADNT	20170807.0
999	ADNT	20170808.0
1000	ADNT	20170809.0
1001	ADNT	20170810.0
1002	ADNT	20170811.0
1003	ADNT	20170814.0
1004	ADNT	20170815.0
1005	ADNT	20170816.0
1006	ADNT	20170906.0
1007	ADNT	20170907.0
1008	ADNT	20170908.0
1009	ADNT	20170911.0
1010	ADNT	20170912.0
1011	ADNT	20170913.0
1012	ADNT	20170914.0
1013	ADNT	20170915.0
1014	ADNT	20170918.0
1015	ADNT	20170919.0
1016	ADNT	20170920.0
1017	ADNT	20170921.0
1018	ADNT	20170922.0
1019	ADNT	20170925.0
1020	ADNT	20170926.0
1021	ADNT	20170927.0
1022	ADNT	20170928.0
1023	ADNT	20170929.0
1024	ADNT	20171002.0
1025	ADNT	20171003.0
1026	ADNT	20171004.0
1027	ADNT	20171005.0
1028	ADNT	20171006.0
1029	ADNT	20171009.0
1030	ADNT	20171010.0
1031	AES	20170817.0
1032	AES	20170818.0
1033	AES	20170822.0
1034	AES	20170823.0
1035	AES	20170824.0
1036	AES	20170825.0
1037	AES	20170

1588	ADM	20170817.0
1589	ADM	20170818.0
1590	ADM	20170821.0
1591	ADM	20170822.0
1592	ADM	20170823.0
1593	ADM	20170824.0
1594	ADM	20170825.0
1595	ADM	20170828.0
1596	ADM	20170829.0
1597	ADM	20170830.0
1598	ADM	20170831.0
1599	ADM	20170901.0
1600	ADM	20170905.0
1601	ADM	20170906.0
1602	ADM	20170914.0
1603	ADM	20170915.0
1604	ADM	20170918.0
1605	ADM	20170919.0
1606	ADM	20170920.0
1607	ADM	20170921.0
1608	ADM	20170922.0
1609	ADM	20170925.0
1610	ADM	20170926.0
1611	ADM	20170927.0
1612	ADM	20170928.0
1613	ADM	20170929.0
1614	ADM	20171002.0
1615	ADM	20171003.0
1616	ADM	20171004.0
1617	ADM	20171005.0
1618	ADM	20171006.0
1619	ADM	20171009.0
1620	ADM	20171010.0
1621	ADMS	20170801.0
1622	ADMS	20170802.0
1623	ADMS	20170803.0
1624	ADMS	20170810.0
1625	ADMS	20170908.0
1626	ADMS	20170911.0
1627	ADMS	20170912.0
1628	ADMS	20170913.0
1629	ADMS	20170914.0
1630	ADMS	20170915.0
1631	ADMS	20170918.0
1632	ADMS	20170919.0
1633	ADMS	20170920.0
1634	ADMS	20170921.0
1635	ADMS	20170922.0
1636	ADMS	20170925.0
1637

2081	AFMD	20170803.0
2082	AFMD	20170804.0
2083	AFMD	20170807.0
2084	AFMD	20170808.0
2085	AFMD	20170809.0
2086	AFMD	20170810.0
2087	AFMD	20170811.0
2088	AFMD	20170814.0
2089	AIN	20170801.0
2090	AIN	20170802.0
2091	AIN	20170803.0
2092	AIN	20170804.0
2093	AIN	20170807.0
2094	AIN	20170808.0
2095	AIN	20170809.0
2096	AIN	20170810.0
2097	AIN	20170811.0
2098	AIN	20170814.0
2099	AIN	20170918.0
2100	AIN	20170920.0
2101	AIN	20170921.0
2102	AIN	20170922.0
2103	AIN	20170925.0
2104	AIN	20170926.0
2105	AIN	20170927.0
2106	AIN	20170928.0
2107	AIN	20170929.0
2108	AIN	20171002.0
2109	AIN	20171003.0
2110	AIN	20171004.0
2111	AIN	20171005.0
2112	AIN	20171006.0
2113	AIN	20171009.0
2114	AIN	20171010.0
2115	AFL	20170801.0
2116	AFL	20170802.0
2117	AFL	20170803.0
2118	AFL	20170804.0
2119	AFL	20170807.0
2120	AFL	20170808.0
2121	AFL	20170809.0
2122	AFL	20170810.0
2123	AFL	20170811.0
2124	AFL	20170814.0
2125	AFL	20170815.0
2126	AFL	20170816.0
2127	AFL	20170817.0
2128	AFL	20170818.0
2129	AFL	20170821.0
2130	AFL	201

KeyboardInterrupt: 

##  Find consolidation pattern

In [4]:
import ta_lib
import matplotlib.pyplot as plt

In [5]:
def volitility(quotes, index, period):
    atr = ta_lib.ATR(quotes, index, period)
    avg = np.mean(quotes.iloc[index - period + 1 : index + 1].close)
    return atr / avg

def is_consolidation(quotes, index, period = 20):
    # range threhold
    RANGE_THREHOLD = 0.1
    
    pre_volitility = volitility(quotes, index - period, period * 2)
    cur_volitility = volitility(quotes, index, period)
    
    if pre_volitility > (cur_volitility * 1.5):
        return True
    else:
        return False
    '''
    quotes_period = quotes.iloc[index - period + 1: index + 1]
    max_high_price = np.max(quotes_period.close)
    min_low_price = np.min(quotes_period.close)
    
    sma = np.mean(quotes_period.close)
    
    return (max_high_price - min_low_price) / sma < RANGE_THREHOLD
    '''

In [17]:
def consolidation_days(quotes, index, min_period = 20):
    VOLITILITY_RATE = 1.5
    
    days = 0
    i = min_period
    while index - i >= 0:
        pre_volitility = volitility(quotes, index - i, i)
        cur_volitility = volitility(quotes, index, i)
        if pre_volitility < 0.1:
            break
        if pre_volitility > (cur_volitility * VOLITILITY_RATE) and pre_volitility < (cur_volitility * 3):
            days = i
            i += 1
        else:
            break
    return days

In [6]:
# find reverse bulge
def reverse_bulge_list(quotes):
    ''' Get reverse_bulge day in period [ index - period + 1, index ] '''
    # 0: in; 1: out
    status = 1
    result_list = []
    
    for i in range(0, len(quotes)):
        mi = ta_lib.MI(quotes, i)
        if mi is None:
            continue
        if mi >= 27:
            status = 0
        else:
            if status == 0 and mi <= 26.5:
                status = 1
                result_list.append(i)
    return result_list


In [7]:
def draw(symbol, quotes):
    EXPAND_SIZE = 100
    mi_list = []
    mfi_list = []

    for i in range(len(quotes)):
        index = i
        dt = quotes.iloc[index].quote_date
        mfi = ta_lib.MFI(quotes, index)
        mi = ta_lib.MI(quotes, index)

        mi_list.append(mi)
        mfi_list.append(mfi)

        print(i, dt, mfi, mi, sep = '\t')
        
    mfi_list = np.array(mfi_list)
    long_period = 16
    short_period = 8

    long_ema_mfi = SMA_LIST(mfi_list, long_period)
    short_ema_mfi = SMA_LIST(mfi_list, short_period)   
        
    rb_list = reverse_bulge(quotes)
    for index in rb_list:

        #ta_lib.EMA_LIST(mfi_list, period, len(mfi_list), period)

        plt.figure(1)
        plt.subplot(311)
        plt.plot(mi_list)
        plt.plot([27] * rang, color = 'k')
        plt.plot([26.5] * rang, 'k:')
        plt.title(symbol)
        plt.ylabel('MI')

        plt.subplot(312)
        view_quotes = list([index - EXPAND_SIZE : index + EXPAND_SIZE].close)
        plt.plot(view_quotes)
        plt.ylabel('stock')

        plt.subplot(313)
        plt.plot(mfi_list)
        plt.plot(long_ema_mfi[index - EXPAND_SIZE : index + EXPAND_SIZE], color = 'r')
        plt.plot(short_ema_mfi[index - EXPAND_SIZE : index + EXPAND_SIZE], color = 'g')
        plt.plot([80] * EXPAND_SIZE * 2, color = 'k')
        plt.plot([20] * EXPAND_SIZE * 2, color = 'k')
        plt.plot([60] * EXPAND_SIZE * 2, 'y:')
        plt.plot([40] * EXPAND_SIZE * 2, 'y:')
        plt.plot([50] * EXPAND_SIZE * 2, 'y:')
        plt.ylabel('MFI')
        plt.ylim(0, 100)

        plt.show()
        

SyntaxError: invalid syntax (<ipython-input-7-2982e26962d5>, line 38)

In [8]:
def PNV(quote):
    ''' Positive volume and Negative volume'''
    all_len = quote.high - quote.low
    candle_len = quote.close - quote.open
    if candle_len > 0:
        pos_len = all_len
        neg_len = all_len - candle_len
    elif candle_len < 0:
        pos_len = all_len + candle_len
        neg_len = all_len
    else:
        pos_len = neg_len = 1
    
    pos_volume = quote.volume * (pos_len / (pos_len + neg_len))
    neg_volume = quote.volume - pos_volume
    return pos_volume, neg_volume
    

In [9]:
def MFR(quotes, index, period):
    ''' Money Flow Rate: rate of positive money divided by negitive money'''
    pos_volume = 0
    neg_volume = 0
    for i in range(index - period + 1, index + 1):
        p, n = PNV(quotes.iloc[i])
        pos_volume += p
        neg_volume += n
    
    return pos_volume / (pos_volume + neg_volume)


In [18]:
# find fitting MFR
period = 200
for key, value in all_quotes.items():
    
    length = len(value)
    for i in range(length - period, length - 100):
        if i < period:
            continue
            
        if value.iloc[i].low < 2:
            continue
        days = consolidation_days(value, i, 30)
        
        if days > 0:
            volumes = np.mean(value.iloc[i - days + 1 : i + 1].volume)
            mfr = MFR(value, i, days)
            if volumes > 30000 and abs(mfr - 0.5) > 0.05:
                high_p = np.max(value.iloc[i : i + 100].high)
                low_p = np.min(value.iloc[i : i + 100].low)
                print(key, '%.f' % value.iloc[i].quote_date,'%.2f' % mfr, days, value.iloc[i].close, high_p, low_p, sep = '\t')

EYES	20170501	0.41	30	1.17	1.46	0.9
EYES	20170503	0.45	30	1.27	1.46	0.9
CBAK	20170628	0.55	34	1.35	2.35	1.2
CLSN	20170427	0.39	30	3.7799	4.96	1.24
CLSN	20170428	0.39	31	3.8079	4.96	1.24
CLSN	20170501	0.39	32	3.6609	4.96	1.24
CLSN	20170502	0.39	33	3.3599	4.96	1.24
CLSN	20170503	0.39	34	3.4299	4.96	1.24
CLSN	20170504	0.39	34	3.4145	4.96	1.24
CLSN	20170505	0.39	35	3.4509	4.96	1.24
CLSN	20170508	0.39	36	3.3529	4.96	1.24
CLSN	20170509	0.39	37	3.2983	4.96	1.24
CLSN	20170510	0.39	38	3.3179	4.96	1.24
CLSN	20170511	0.41	43	3.4145	4.96	1.24
FCSC	20170223	0.45	33	2.085	4.6359	1.8
FCSC	20170224	0.44	34	2.1	4.6359	1.8
FCSC	20170227	0.44	35	2.091	4.6359	1.8
FCSC	20170228	0.44	36	2.1	4.6359	1.8
FCSC	20170301	0.44	37	2.13	4.6359	1.8
SGY	20170412	0.42	30	21.76	26.24	16.76
SGY	20170413	0.42	31	20.26	26.24	16.76
SGY	20170417	0.42	32	20.15	26.35	16.76
SGY	20170418	0.42	33	20.59	27.3	16.76
SGY	20170419	0.42	34	20.78	27.3	16.76
SGY	20170420	0.43	35	21.1	27.3	16.76
SGY	20170421	0.43	36	20.93	27.3	16.76
SGY	2

In [40]:
# find breakout volume
period = 200
for key, value in all_quotes.items():
    if companies[companies.symbol == key].sector.iloc[0] == 'Health Care':
        continue
    
    length = len(value)
    for i in range(length - period, length - 100):
        if i < period:
            continue
            
        if value.iloc[i].low < 2:
            continue
            
        quotes = value.iloc[i - 50 : i]
        sma_volume = np.mean( quotes.volume )
        pre_high_p = np.max(quotes.high)
        
        today = value.iloc[i]
        tomorrow = value.iloc[i + 1]
        if today.volume > sma_volume * 5 and today.close > today.open and today.close > pre_high_p:
            exam_quotes = value.iloc[i + 1 : i + 51]
            high_p = np.max(exam_quotes.high)
            high_day = np.argmax(exam_quotes.high)
            low_p = np.min(exam_quotes.low)
            low_day = np.argmin(exam_quotes.low)
            if low_p > pre_high_p * 0.95:   
                print(key, '%.f' % today.quote_date, '%.2f'% (today.close / pre_high_p - 1), '%.2f(%d)' % (high_p / pre_high_p - 1, high_day - i) , '%.2f(%d)' % (low_p / pre_high_p - 1, low_day - i), sep = '\t')

AABA	20170608	0.09	0.24(50)	0.01(5)
DECK	20170526	0.08	0.17(6)	0.01(40)
CVLT	20170503	0.06	0.12(37)	0.03(11)
WYN	20170426	0.08	0.14(42)	0.01(15)
UCTT	20170223	0.07	0.63(50)	0.01(6)
VAC	20170223	0.07	0.38(50)	0.00(9)
EQBK	20170717	0.06	0.13(6)	0.03(44)
BREW	20170504	0.03	0.13(39)	-0.02(11)
VWR	20170503	0.17	0.20(1)	0.14(11)
AJRD	20170302	0.07	0.15(38)	0.00(50)
GAIA	20170323	0.06	0.35(16)	0.01(2)
GAIA	20170328	0.01	0.25(13)	-0.02(2)
VEEV	20170301	0.00	0.26(50)	-0.03(3)
BATRA	20170313	0.01	0.12(31)	-0.03(7)
HOFT	20170406	0.10	0.31(22)	0.06(1)
SNBR	20170420	0.28	0.41(49)	0.09(28)
PLW	20170411	0.00	0.02(50)	-0.03(10)
MOMO	20170307	0.08	0.60(49)	0.06(2)
MOMO	20170308	0.13	0.48(48)	-0.02(1)
INST	20170502	0.00	0.19(39)	-0.03(2)
INST	20170623	0.04	0.22(26)	0.02(8)
ADSK	20170519	0.12	0.18(47)	0.01(31)
ATHN	20170518	0.05	0.28(44)	0.05(1)
NYT	20170503	0.03	0.15(28)	0.03(1)
RAIL	20170504	0.07	0.24(6)	0.06(1)
DGRW	20170619	0.00	0.01(22)	-0.02(8)
ANGI	20170502	0.53	1.11(48)	0.55(1)
ANGI	20170503	0.04

In [32]:
all_quotes['SNAP'].open

0      24.00
1      26.39
2      28.17
3      22.21
4      22.03
5      23.15
6      23.36
7      22.05
8      20.90
9      20.08
10     20.65
11     19.80
12     19.94
13     20.04
14     20.65
15     22.69
16     23.05
17     23.09
18     23.31
19     21.77
20     22.55
21     22.05
22     22.70
23     22.21
24     22.15
25     20.76
26     20.68
27     21.09
28     21.01
29     20.70
       ...  
175    13.47
176    12.64
177    12.50
178    12.83
179    12.41
180    12.32
181    12.50
182    12.60
183    13.10
184    12.52
185    12.35
186    12.67
187    12.87
188    13.37
189    13.54
190    13.41
191    13.67
192    13.95
193    14.40
194    14.58
195    14.65
196    14.64
197    15.14
198    16.00
199    15.80
200    15.92
201    16.00
202    15.95
203    15.91
204    15.69
Name: open, Length: 205, dtype: float64