In [2]:
import pandas as pd
import numpy as np
from datetime import date
import warnings
from tqdm.auto import tqdm

In [3]:
def build_weekly_group(df):
    # index情報から、(year, week)の情報を得る。
    return pd.Series(list(zip(df.index.isocalendar().year, df.index.isocalendar().week)), index=df.index)

In [4]:
def calc_weekly_return(x):
    CLOSE = "EndOfDayQuote ExchangeOfficialClose"
    OPEN = "EndOfDayQuote Open"
    wr = (x[CLOSE].iloc[-1] - x[OPEN].iloc[0]) / x[OPEN].iloc[0]
    try:
        dt = x.loc[x.index.dayofweek==4].reset_index().at[0, "datetime"]
    except KeyError:
        return

    return pd.DataFrame({"weekly_return": [wr]}, index=[dt])

In [14]:
def extract_fwd_return(df):
    weekly_group = build_weekly_group(df)
    weekly_return = df.groupby(weekly_group).apply(calc_weekly_return)
    weekly_fwd_return = weekly_return.shift(-1).dropna()
    return weekly_fwd_return.reset_index(0)["weekly_return"]

In [20]:
def build_objective(df, code):
    ob_df = df[df["Local Code"] == code]
    ob_df = ob_df.fillna(0)
    
    weekly_fwd_return = extract_fwd_return(df)
    
    ob_df = ob_df.resample("B").ffill()
    ob_df = ob_df.loc[ob_df.index.dayofweek == 4]  # 4 is friday
    
    ob_df["objective"] = weekly_fwd_return
    ob_df.dropna(inplace=True)
    ob_df = ob_df.replace([np.inf, -np.inf], 0)
    
    ob_df["code"] = code
    return ob_df

In [16]:
price_df = pd.read_csv("../data/stock_price.csv.gz")
list_df = pd.read_csv("../data/stock_list.csv.gz")

In [17]:
codes = list_df[list_df["universe_comp2"] == True]["Local Code"].values

In [18]:
# stock.py Stock.preprocessで行っている置換
price_df.loc[:, "datetime"] = pd.to_datetime(price_df.loc[:, "EndOfDayQuote Date"])
price_df.set_index("datetime", inplace=True)

In [21]:
whole_code_objective_df_list = []
warnings.simplefilter("ignore", RuntimeWarning)
for code in tqdm(codes):
    whole_code_objective_df_list.append(build_objective(price_df, code))
warnings.resetwarnings()
    
objective_df = pd.concat(whole_code_objective_df_list)
objective_df.to_csv("stock_weekly_fwd_return.csv")

HBox(children=(IntProgress(value=0, max=1779), HTML(value='')))

KeyboardInterrupt: 