# Alpha 전략 feat.우혁님

기존 데이터에 우혁님 코드 합쳐 돌려보기

## Basic settings

### Import libraries

In [1]:
from prophet import Prophet

from tqdm import tqdm

In [2]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from pathlib import Path

In [3]:
## custom library

import eda_util as eutil
import submission_config as subconfig
import submission_util as subutil

In [4]:
pd.set_option('display.float_format', lambda x: f'{x:,g}')

In [5]:
BASE_PATH = subconfig.BASE_PATH
DATA_PATH = subconfig.DATA_PATH

OUTPUT_PATH = subconfig.OUTPUT_PATH

### Import data & preprocessing

In [6]:
krx_df = pd.read_csv(subconfig.krx_df_PATH)

In [7]:
krx_df.columns = ['date', 'code', 'name', 'volume', 'open', 'high', 'low', 'close']

In [8]:
krx_df['date'] = pd.to_datetime(krx_df['date'], format='%Y%m%d')

In [9]:
return_df = pd.read_pickle(subconfig.return_df_PATH)
close_df = pd.read_pickle(subconfig.adjclose_df_PATH)

In [10]:
open_df = pd.read_pickle(subconfig.adjopen_df_PATH)
high_df = pd.read_pickle(subconfig.adjhigh_df_PATH)
low_df = pd.read_pickle(subconfig.adjlow_df_PATH)

In [11]:
## date list

holidays = return_df.isnull().all(axis=1)
tradingdays = ~holidays

holidays = holidays.index[holidays]
tradingdays = tradingdays.index[tradingdays]

In [12]:
TRAIN_START = pd.to_datetime(subconfig.TRAIN_START, format='%Y-%m-%d')
SIMOS_END = pd.to_datetime(subconfig.SIMOS_END, format='%Y-%m-%d')

In [13]:
tradingdays = tradingdays[(tradingdays >= TRAIN_START) & (tradingdays <= SIMOS_END)]

In [14]:
dacon_sid_list = [ii[1:] for ii in krx_df['code'].unique()]

In [15]:
return_df = return_df.loc[tradingdays, :].dropna(axis='columns', how='all')
return_df = return_df.loc[:, dacon_sid_list]

close_df = close_df.loc[tradingdays, :].dropna(axis='columns', how='all')
close_df = close_df.loc[:, dacon_sid_list]

In [16]:
open_df = open_df.loc[tradingdays, :].dropna(axis='columns', how='all')
open_df = open_df.loc[:, dacon_sid_list]

high_df = high_df.loc[tradingdays, :].dropna(axis='columns', how='all')
high_df = high_df.loc[:, dacon_sid_list]

low_df = low_df.loc[tradingdays, :].dropna(axis='columns', how='all')
low_df = low_df.loc[:, dacon_sid_list]

In [17]:
SIMOS_START = subconfig.SIMOS_START
# simOS_END = subconfig.SIMOS_END

### Import additional data

In [18]:
volume_df = pd.read_pickle(subconfig.volume_df_PATH)
dollarvolume_df = pd.read_pickle(subconfig.dollarvolume_df_PATH)
marketcap_df = pd.read_pickle(subconfig.marketcap_df_PATH)
market_cat_df = pd.read_pickle(DATA_PATH / 'market_cat_df_20140101_20230705.pickle')

In [19]:
volume_df = volume_df.loc[tradingdays, :].dropna(axis='columns', how='all')
volume_df = volume_df.loc[:, dacon_sid_list]

dollarvolume_df = dollarvolume_df.loc[tradingdays, :].dropna(axis='columns', how='all')
dollarvolume_df = dollarvolume_df.loc[:, dacon_sid_list]

marketcap_df = marketcap_df.loc[tradingdays, :].dropna(axis='columns', how='all')
marketcap_df = marketcap_df.loc[:, dacon_sid_list]

In [20]:
volume_df = volume_df.shift(1)
dollarvolume_df = dollarvolume_df.shift(1)
marketcap_df = marketcap_df.shift(1)

In [21]:
market_cat_inrange = market_cat_df[market_cat_df['trdDd'].isin(tradingdays)]

In [22]:
KOSPI_sid_list = market_cat_inrange[market_cat_inrange['is_KOSPI'] == True]['ISU_SRT_CD'].unique()
KOSDAQ_sid_list = market_cat_inrange[market_cat_inrange['is_KOSDAQ'] == True]['ISU_SRT_CD'].unique()
KONEX_sid_list = market_cat_inrange[market_cat_inrange['is_KONEX'] == True]['ISU_SRT_CD'].unique()

### Parameters

In [23]:
PORTFOLIO_DATE = subconfig.PORTFOLIO_DATE

RDVADV_WINDOW = subconfig.WINDOWS['rdvadv'] # 20

## Alphas

### Integrating my data with Woohyuk's code

In [24]:
## train_close만 잘 맞춰주면 될 것 같습니다 ! 

## 아래는 인수님꺼에 train_close 넣었던 코드 

# Iterate over each unique stock
for code in tqdm(dacon_sid_list):
    
    # Filter by stock code
    # Note: All prices are adjusted
    # TODO: Add normalized rdvadv signal to the columns

    train_close = pd.DataFrame(
        data={
            'open': open_df.loc[TRAIN_START:SIMOS_START, code],
            'high': high_df.loc[TRAIN_START:SIMOS_START, code],
            'low': low_df.loc[TRAIN_START:SIMOS_START, code],
            'close': close_df.loc[TRAIN_START:SIMOS_START, code],
            'dollarvolume': dollarvolume_df.loc[TRAIN_START:SIMOS_START, code],
            'marketcap': marketcap_df.loc[TRAIN_START:SIMOS_START, code],
        }
        )
    train_close = train_close.iloc[1:, :] # nan 있는 1st row 제거


In [None]:
train = pd.read_csv('./train.csv')

# 결과를 저장하기 위한 dataframe 생성
results_df = pd.DataFrame(columns=['종목코드', 'final_return'])

# train 데이터에 존재하는 독립적인 종목코드 추출
unique_codes = dacon_sid_list

# Iterate over each unique stock
for code in tqdm(unique_codes):

    # Filter by stock code
    train_close = train[train['종목코드'] == code][['일자', '거래량', '시가', '고가', '저가',  '종가']]
    train_close.columns = ['ds', '거래량', '시가', '고가', '저가', 'y']  # rename the columns for Prophet
    train_close['ds'] = pd.to_datetime(train_close['ds'], format='%Y%m%d')

    # Initialize Prophet model
    prophet_model = Prophet()

    # Add additional regressors
    prophet_model.add_regressor('거래량')
    prophet_model.add_regressor('시가')
    prophet_model.add_regressor('고가')
    prophet_model.add_regressor('저가')

    # Fit Prophet model
    prophet_model.fit(train_close)
        
    # Make predictions
    future = prophet_model.make_future_dataframe(periods=15)
    future['거래량'] = train_close['거래량']
    future['시가'] = train_close['시가']
    future['고가'] = train_close['고가']
    future['저가'] = train_close['저가']
    forecast_prophet = prophet_model.predict(future)
    
    # Calculate returns for Prophet
    returns_prophet = (forecast_prophet['yhat'].values[-1] - forecast_prophet['yhat'].values[-15]) / forecast_prophet['yhat'].values[-15]
    returns_df_prophet = pd.DataFrame([returns_prophet])
    
    # Add returns to final DataFrame
    returns_df_fin_prophet = pd.concat([returns_df_fin_prophet, returns_df_prophet], axis=0)