In [None]:
import numpy as np
import pandas as pd

In [None]:
# !pip install yfinance --upgrade --no-cache-dir
import yfinance as yf

In [None]:
result = pd.DataFrame({
    "stock": [],
    "gap_date": [],
    "revert_time": [],
    "u_curve_effect": [],
    "u_curve_profit": [],
    "lowest_day": []
})
result["stock"] = result["stock"].astype(object)
result["gap_date"] = pd.to_datetime(result["gap_date"])
result["revert_time"] = result["revert_time"].astype(int)
result["u_curve_effect"] = result["u_curve_effect"].astype(bool)

for stock in stock_list["Symbol"]:
    data = yf.download(stock, start="2000-01-01", end="2019-07-21")
    
    # Calculate adjusted ratio to adjust open/high/low price
    data["adjusted_ratio"] = data["Adj Close"] / data["Close"]
    data["Adj Open"] = data["adjusted_ratio"] * data["Open"]
    data["Adj High"] = data["adjusted_ratio"] * data["High"]
    data["Adj Low"] = data["adjusted_ratio"] * data["Low"]
    
    # get 250 moving average
    data["MA250"] = data["Adj Close"].rolling(window = 250).mean()
                                                     
    # get daily adjusted average price: (open + close) / 2
    data["Adj Average"] = (data["Adj Open"] + data["Adj Close"]) * 0.5
                                                     
    # pick up trading days which satisfied: 
    # 1. have a gap
    # 2. closed price decreased more than 10%
    # 3. gap across 250 moving avergae
    gap_days = []
    for row in range(data.shape[0]):
        if np.isnan(data['MA250'].iloc[row]):
            continue
        curr_ma250 = data["MA250"].iloc[row]
        prev_close = data["Adj Close"].iloc[row - 1]
        curr_open = data["Adj Open"].iloc[row]
        curr_close = data["Adj Close"].iloc[row]

        gap = prev_close - curr_open
        daily_change = curr_close / prev_close - 1
        if prev_close > curr_ma250 and curr_ma250 > curr_open and curr_ma250 > curr_close:
            across_250 = True
        else:
            across_250 = False
        # print(gap, daily_change, across_250)

        if gap > 0 and daily_change <= -0.1 and across_250 == True:
            gap_days.append(row)
    
    gap_happened = []
    revert_time = []
    u_curve_exist = []
    lowest_days = []
    u_curve_profit = []
    # highest_days = []
    # j_curve_profit = []
    
    for i in gap_days:
        break_sign = False
        first_day = i
        days = 0

        curr_close = data["Adj Close"].iloc[i]

        initial_open = data["Adj Open"].iloc[first_day]
        initial_close = data["Adj Close"].iloc[first_day]

        while curr_close <= max(initial_open, initial_close):
            days += 1
            i += 1
            if i >= data.shape[0]:
                break_sign = True
                break
            # curr_open = data["Adj Open"].iloc[i]
            curr_close = data["Adj Close"].iloc[i]

        if break_sign == False:
            gap_happened.append(first_day)
            revert_time.append(days)

            if days <= 30:
                u_curve_exist.append(True)
                lowest_price = min(data["Adj Average"].iloc[first_day:(first_day + days + 1)]) 
                # suppose enter into the stock on the average price of the "lowest price day"
                u_profit = (curr_close - lowest_price) / lowest_price 
                # calculate the profit from lowest price day to revert day

                which_min = data["Adj Average"].iloc[first_day:(first_day + days + 1)] == lowest_price
                min_day = list(which_min).index(True) # which day is the lowest day
                lowest_days.append(min_day)

                u_curve_profit.append(u_profit)
            else:
                u_curve_exist.append(False)
                lowest_days.append(None)
                u_curve_profit.append(None)

    counts = len(gap_happened)
    temp = pd.DataFrame({
        "stock": [stock] * counts,
        "gap_date": gap_happened,
        "revert_time": revert_time,
        "u_curve_effect": u_curve_exist,
        "u_curve_profit": u_curve_profit,
        "lowest_day": lowest_days
    })
    temp["stock"] = temp["stock"].astype(object)
    temp["gap_date"] = pd.to_datetime(result["gap_date"])
    temp["revert_time"] = temp["revert_time"].astype(int)
    temp["u_curve_effect"] = temp["u_curve_effect"].astype(bool)
    temp["u_curve_profit"] = temp["u_curve_profit"].astype(float)
    temp["lowest_day"] = temp["lowest_day"].astype(float)
    
    result = result.append(temp, ignore_index=True)

In [None]:
result.head(20)

In [None]:
len(result["stock"].unique())

In [None]:
result["u_curve_effect"].sum() / result.shape[0]

In [None]:
result[result.u_curve_effect == True].groupby(["lowest_day"]).agg({"u_curve_profit": ["count","mean"]})