# Find spike days

## Load all companies

In [1]:
import numpy as np
import pandas as pd

In [2]:
from db_operation import DBService

db = DBService()

db.connect()

companies = db.get_companies()

companies.head()

Unnamed: 0,symbol,name,ipo_year,sector,industry
0,AAPC,Atlantic Alliance Partnership Corp.,2015,Consumer Services,Real Estate Investment Trusts
1,ABTL,Autobytel Inc.,1999,Technology,"Computer Software: Programming, Data Processing"
2,ACCP,"Accelerated Pharma, Inc.",0,,
3,ACIU,AC Immune SA,2016,Health Care,Major Pharmaceuticals
4,ACIA,"Acacia Communications, Inc.",2016,Technology,Semiconductors


## Get all quotes

In [5]:
#< symbol, quote list>
all_quotes = {}

In [6]:
%%time
for symbol in companies.symbol.values:
    if symbol in all_quotes:
        quotes = all_quotes[symbol]
    else:
        quotes = db.get_quotes_by_symbol(symbol)
        if np.any(quotes['volume'] < 100):
            #print('Zero volume:', symbol)
            continue
        else:
            all_quotes[symbol] = quotes

print('Load success:', len(all_quotes))
all_quotes['SNAP'].head()

Load success: 4091
Wall time: 3min 55s


## Reports of  大盘


In [97]:
reports = []
target_dt =  -1
period = 300
for sector, group in companies[['symbol', 'sector']].groupby(['sector']):
    
    for i in range(period):
        cur_dt = target_dt - i
        up_count = 0
        down_count = 0        
        for symbol in group['symbol']:
            if symbol in all_quotes and len(all_quotes[symbol]) > period:
                quote_dt = all_quotes[symbol]['quote_date'].iloc[cur_dt]
                close_p = all_quotes[symbol]['close']

                if close_p.iloc[cur_dt] > close_p.iloc[cur_dt - 1]:
                    up_count += 1
                else:
                    down_count += 1
        reports.append([quote_dt ,sector, up_count, down_count, up_count + down_count, up_count / (up_count + down_count)])

reports = pd.DataFrame(reports, columns = ['quote_dt', 'sector', 'up_count', 'down_count', 'total', 'prec'])
print(reports)
print(reports['total'].sum(), reports['up_count'].sum(), reports['down_count'].sum())

      quote_dt            sector  up_count  down_count   total      prec
0     20171113  Basic Industries        93         119     212  0.438679
1     20171110  Basic Industries       176         248     424  0.415094
2     20171109  Basic Industries       234         402     636  0.367925
3     20171108  Basic Industries       335         513     848  0.395047
4     20171107  Basic Industries       400         660    1060  0.377358
5     20171106  Basic Industries       527         745    1272  0.414308
6     20171103  Basic Industries       617         867    1484  0.415768
7     20171102  Basic Industries       713         983    1696  0.420401
8     20171101  Basic Industries       818        1090    1908  0.428721
9     20171031  Basic Industries       940        1180    2120  0.443396
10    20171030  Basic Industries      1008        1324    2332  0.432247
11    20171027  Basic Industries      1113        1431    2544  0.437500
12    20171026  Basic Industries      1230        1

Unnamed: 0,sector,up_count,down_count,total
0,Basic Industries,97,127,224
1,Capital Goods,116,150,266
2,Consumer Durables,56,36,92
3,Consumer Non-Durables,75,82,157
4,Consumer Services,304,323,627
5,Energy,46,193,239
6,Finance,311,192,503
7,Health Care,220,234,454
8,Miscellaneous,43,56,99
9,Public Utilities,96,103,199


## Find spike

In [7]:
%%time

# count of spike day
count = 0
# spike volume require multiple
spike_multiple = 50
# observing date period and sma period
start_dt = -150
target_dt = -100
sma_period = 20

for symbol, quotes in all_quotes.items():
    if len(quotes) < abs(start_dt) + sma_period:
        continue
    cur_dt = start_dt
    
    while cur_dt < target_dt:     
        sma_vol = np.mean(quotes['volume'].iloc[cur_dt - sma_period: cur_dt])
        vol = quotes['volume'].iloc[cur_dt]
        if vol > sma_vol * spike_multiple:
            count += 1
            # quotes['quote_date'].iloc[cur_dt], sma_vol, vol,
            if quotes['close'].iloc[cur_dt] > quotes['open'].iloc[cur_dt]:
                trend = 'Up'
            else:
                trend = 'Down'
            print(count, '\t', symbol, '\t', quotes['quote_date'].iloc[cur_dt],'\t',  int(vol / sma_vol), end = '\t')
            price_period = quotes['close'].iloc[cur_dt + 1 : cur_dt + 51]
            max_price = np.max(price_period)
            min_price = np.min(price_period)
            cur_price = quotes['close'].iloc[cur_dt]
            #print('Close:', cur_price, '\tMax:', max_price, '\tMin:', min_price, end = '\t')
            print(round(max_price / cur_price - 1, 2), '\t', round(min_price / cur_price - 1, 2), '\t', trend)
            
            
        # loop
        cur_dt += 1



1 	 GTXI 	 20170612 	 247	-0.05 	 -0.32 	 Up
2 	 SNDX 	 20170517 	 51	0.09 	 -0.28 	 Up
3 	 SPI 	 20170525 	 252	-0.03 	 -0.75 	 Up
4 	 WSTC 	 20170510 	 54	0.01 	 -0.0 	 Down
5 	 WHF 	 20170627 	 81	0.09 	 -0.01 	 Down
6 	 PRTO 	 20170510 	 102	-0.03 	 -0.28 	 Down
7 	 MBRX 	 20170621 	 79	2.15 	 -0.03 	 Down
8 	 OCX 	 20170522 	 144	-0.14 	 -0.51 	 Up
9 	 AKBA 	 20170426 	 111	0.32 	 -0.03 	 Up
10 	 PTGX 	 20170530 	 186	0.18 	 -0.11 	 Down
11 	 OB 	 20170503 	 69	0.01 	 -0.0 	 Down
12 	 CCIH 	 20170601 	 50	0.26 	 -0.43 	 Up
13 	 FOGO 	 20170512 	 68	0.03 	 -0.03 	 Down
14 	 AVEO 	 20170623 	 89	2.07 	 0.56 	 Up
15 	 JMP 	 20170705 	 72	-0.22 	 -0.27 	 Up
16 	 MDGL 	 20170523 	 58	0.01 	 -0.19 	 Down
17 	 EMMS 	 20170509 	 77	0.05 	 -0.14 	 Up
18 	 ZPIN 	 20170406 	 66	0.06 	 -0.0 	 Up
19 	 CLSN 	 20170605 	 191	-0.03 	 -0.61 	 Down
20 	 ANGI 	 20170502 	 57	0.36 	 0.09 	 Up
21 	 TCS 	 20170524 	 62	0.14 	 -0.21 	 Down
22 	 FORD 	 20170512 	 152	-0.1 	 -0.2 	 Up
23 	 GLYC 	 20170518

In [51]:
np.any( all_quotes['SNAP']['volume'] > 110)

True

Unnamed: 0,quote_date,open,close,high,low,volume
0,20170302,24.0,24.48,26.05,23.5,216978300
1,20170303,26.39,27.09,29.44,26.06,148099300
2,20170306,28.17,23.77,28.25,23.77,72804120
3,20170307,22.21,21.44,22.5,20.64,71810660
4,20170308,22.03,22.81,23.43,21.31,49784130
