In [1]:
import numpy as np
import pandas as pd

In [2]:
# !pip install yfinance --upgrade --no-cache-dir
import yfinance as yf

In [8]:
# read in the list of stocks in S&P 500
stock_list = pd.read_csv("sp500_list.csv")
stock_list.head()
# print(stock_list.Sector.unique())

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M Company,Industrials
1,AOS,A.O. Smith Corp,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie Inc.,Health Care
4,ACN,Accenture plc,Information Technology


In [58]:
# create an empty result table
result = pd.DataFrame({
    "stock": [],
    "gap_date": [],
    "gap_loss": [],
    "revert_time": [],
    "u_curve_effect": [],
    "u_curve_profit": [],
    "lowest_day": []
})
result["stock"] = result["stock"].astype(object)
result["gap_date"] = pd.to_datetime(result["gap_date"])
result["gap_loss"] = result["gap_loss"].astype(float)
result["revert_time"] = result["revert_time"].astype(int)
result["u_curve_effect"] = result["u_curve_effect"].astype(bool)
result["u_curve_profit"] = result["u_curve_profit"].astype(float)
result["lowest_day"] = result["lowest_day"].astype(float)

In [59]:
spy = yf.download("SPY", start="2000-01-01", end="2019-07-21")
spy.head()

[*********************100%***********************]  1 of 1 downloaded


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,148.25,148.25,143.88,145.44,100.93,8164300
2000-01-04,143.53,144.06,139.64,139.75,96.99,8089800
2000-01-05,139.94,141.53,137.25,140.0,97.16,12177900
2000-01-06,139.62,141.5,137.75,137.75,95.6,6227200
2000-01-07,140.31,145.75,140.06,145.75,101.15,8066500


In [60]:
# scan all stocks in sp500 to find all "gap over ma250" opportunities
for stock in stock_list["Symbol"]:
    data = yf.download(stock, start="2000-01-01", end="2019-07-21")
    
    # Calculate adjusted ratio to adjust open/high/low price
    data["adjusted_ratio"] = data["Adj Close"] / data["Close"]
    data["Adj Open"] = data["adjusted_ratio"] * data["Open"]
    data["Adj High"] = data["adjusted_ratio"] * data["High"]
    data["Adj Low"] = data["adjusted_ratio"] * data["Low"]
    
    # get 250 moving average
    data["MA250"] = data["Adj Close"].rolling(window = 250).mean()
                                                     
    # get daily adjusted average price: (open + close) / 2
    data["Adj Average"] = (data["Adj Open"] + data["Adj Close"]) * 0.5
                                                     
    # pick up trading opportunity days which satisfied: 
    # 1. have a gap
    # 2. closed price decreased more than 10%
    # 3. gap across 250 moving avergae
    gap_days = []
    for row in range(data.shape[0]):
        if np.isnan(data['MA250'].iloc[row]):
            continue
        curr_ma250 = data["MA250"].iloc[row]
        prev_close = data["Adj Close"].iloc[row - 1]
        curr_open = data["Adj Open"].iloc[row]
        curr_close = data["Adj Close"].iloc[row]

        gap = prev_close - curr_open
        daily_change = curr_close / prev_close - 1
        if prev_close > curr_ma250 and curr_ma250 > curr_open and curr_ma250 > curr_close:
            across_250 = True
        else:
            across_250 = False
        # print(gap, daily_change, across_250)

        if gap > 0 and daily_change <= -0.1 and across_250 == True:
            gap_days.append(row)
    
    gap_happened = []
    gap_loss = []
    revert_time = []
    u_curve_exist = []
    lowest_days = []
    u_curve_profit = []
    # highest_days = []
    # j_curve_profit = []
    
    for i in gap_days:
        break_sign = False
        first_day = i
        first_date = data.index[i]
        days = 0

        curr_close = data["Adj Close"].iloc[i]

        initial_open = data["Adj Open"].iloc[first_day]
        initial_close = data["Adj Close"].iloc[first_day]
        
        # calculate the gap day loss
        loss = max(initial_open, initial_close) / data["Adj Close"].iloc[first_day - 1] - 1
        
        while curr_close <= max(initial_open, initial_close):
            days += 1
            i += 1
            if i >= data.shape[0]:
                break_sign = True
                break
            # curr_open = data["Adj Open"].iloc[i]
            curr_close = data["Adj Close"].iloc[i]

        if break_sign == False:
            gap_happened.append(first_date)
            gap_loss.append(loss)
            revert_time.append(days)

            if days <= 30:
                u_curve_exist.append(True)
                lowest_price = min(data["Adj Average"].iloc[first_day:(first_day + days + 1)]) 
                # suppose enter into the stock on the average price of the "lowest price day"
                u_profit = (curr_close - lowest_price) / lowest_price 
                # calculate the profit from lowest price day to revert day

                which_min = data["Adj Average"].iloc[first_day:(first_day + days + 1)] == lowest_price
                min_day = list(which_min).index(True) # which day is the lowest day
                lowest_days.append(min_day)

                u_curve_profit.append(u_profit)
            else:
                u_curve_exist.append(False)
                lowest_days.append(None)
                u_curve_profit.append(None)

    counts = len(gap_happened)
    temp = pd.DataFrame({
        "stock": [stock] * counts,
        "gap_date": gap_happened,
        "gap_loss": gap_loss,
        "revert_time": revert_time,
        "u_curve_effect": u_curve_exist,
        "u_curve_profit": u_curve_profit,
        "lowest_day": lowest_days
    })
    temp["stock"] = temp["stock"].astype(object)
    temp["gap_date"] = pd.to_datetime(temp["gap_date"])
    temp["gap_loss"] = temp["gap_loss"].astype(float)
    temp["revert_time"] = temp["revert_time"].astype(int)
    temp["u_curve_effect"] = temp["u_curve_effect"].astype(bool)
    temp["u_curve_profit"] = temp["u_curve_profit"].astype(float)
    temp["lowest_day"] = temp["lowest_day"].astype(float)
    
    result = result.append(temp, ignore_index=True)

[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*******************

Exception in thread Thread-1131:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 483, in _download_one
    proxy=proxy)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 304, in history
    raise ValueError(self.ticker, err_msg)
ValueError: ('BRK.B', 'No data found, symbol may be delisted')



[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded




[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded


Exception in thread Thread-1144:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 313, in history
    quotes = self._parse_quotes(data["chart"]["result"][0])
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 162, in _parse_quotes
    timestamps = data["timestamp"]
KeyError: 'timestamp'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Use

[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*******************

Exception in thread Thread-1194:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 313, in history
    quotes = self._parse_quotes(data["chart"]["result"][0])
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 162, in _parse_quotes
    timestamps = data["timestamp"]
KeyError: 'timestamp'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Use

[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*******************

Exception in thread Thread-1216:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 313, in history
    quotes = self._parse_quotes(data["chart"]["result"][0])
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 162, in _parse_quotes
    timestamps = data["timestamp"]
KeyError: 'timestamp'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Use


[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[******************

Exception in thread Thread-1271:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 313, in history
    quotes = self._parse_quotes(data["chart"]["result"][0])
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 162, in _parse_quotes
    timestamps = data["timestamp"]
KeyError: 'timestamp'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Use


[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[******************

Exception in thread Thread-1343:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 313, in history
    quotes = self._parse_quotes(data["chart"]["result"][0])
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 162, in _parse_quotes
    timestamps = data["timestamp"]
KeyError: 'timestamp'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Use

[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*******************

Exception in thread Thread-1370:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 313, in history
    quotes = self._parse_quotes(data["chart"]["result"][0])
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 162, in _parse_quotes
    timestamps = data["timestamp"]
KeyError: 'timestamp'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Use

[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*******************



[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*******************

Exception in thread Thread-1555:
Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 313, in history
    quotes = self._parse_quotes(data["chart"]["result"][0])
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 162, in _parse_quotes
    timestamps = data["timestamp"]
KeyError: 'timestamp'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\anyez\Anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\multitasking\__init__.py", line 102, in _run_via_pool
    return callee(*args, **kwargs)
  File "C:\Users\anyez\Anaconda3\lib\site-packages\yfinance\__init__.py", line 470, in _download_one_threaded
    period, interval, prepost, proxy)
  File "C:\Use


[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded
[*********************100%***********************]  1 of 1 downloaded


In [61]:
# calculate the spy (market) price change over last 20 days before the gap day
market_change = []
for day in result.gap_date:
    spy_row = list(spy.index == day).index(True)
    spy_change20 = spy["Adj Close"].iloc[spy_row] / spy["Adj Close"].iloc[spy_row - 19] - 1
    market_change.append(spy_change20)
result["market_change"] = market_change

In [62]:
# attach sector information
result = pd.merge(result, stock_list[["Symbol", "Sector"]], left_on = "stock", right_on = "Symbol", how = "left")
del result["Symbol"]

In [63]:
result.head()

Unnamed: 0,stock,gap_date,gap_loss,revert_time,u_curve_effect,u_curve_profit,lowest_day,market_change,Sector
0,AOS,2001-10-01,-0.147274,1,True,0.07848,0.0,-0.102177,Industrials
1,AOS,2008-12-01,-0.030463,2,True,0.058008,1.0,-0.154459,Industrials
2,AYI,2008-07-02,-0.094141,26,True,0.156382,6.0,-0.099274,Industrials
3,AYI,2018-01-09,-0.116989,8,True,0.055007,1.0,0.036177,Industrials
4,ADBE,2002-06-14,-0.09838,191,False,,,-0.08558,Information Technology


In [71]:
result.to_csv("./data/raw_data.csv")