In [None]:
!pip install --upgrade pandas

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime


In [None]:
!pip install finance-datareader
import FinanceDataReader as fdr

In [None]:
df_krx = fdr.StockListing('KRX')
df_krx.to_csv('./krx_stock_symbols.csv')

In [51]:
target = 'SK하이닉스'
start = '20100101'
end = '20231231'

symbols = pd.read_csv('./krx_stock_symbols.csv')
code = symbols[symbols['Name'] == target]['Code'].iloc[0]

stock_data = fdr.DataReader(code, start, end)

stock_data.to_csv('../stock_price.csv')

In [None]:

# 함수(buynotsell)를 생성 -> 매개변수는 4개
# (df(dataframe), col(columns명), start(문자형데이터), end(문자형데이터))
def buynotsell(df, col, start, end):
    # df에 결측치나 무한대 값들을 모두 제거 
    flag = df.isin([np.nan, np.inf, -np.inf]).any(axis=1)
    df = df.loc[~flag]
    if 'Date' in df.columns:
        # df에 있는 Date 컬럼을 시계열 데이터로 변경
        df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
        df.set_index('Date', inplace=True)
    else:
        df.index = pd.to_datetime(df.index)
    # col 매개변수를 이용하여 df에 해당하는 col 컬럼만 제외하고 모두 제거 
    df = df[[col]]
    # start, end 데이터를 시계열 데이터로 변경
    buy = datetime.strptime(start, '%Y-%m-%d').isoformat()
    sell = datetime.strptime(end, '%Y-%m-%d').isoformat()
    # daily_rtn 파생변수를 생성하여 일별 수익율을 계산해서 대입
    df['daily_rtn'] = df[col].pct_change()
    # start, end를 기준으로 df을 필터링
    df = df.loc[buy:sell]
    # rtn 파생변수를 생성하여 누적 수익율 계산하여 대입 
    df['rtn'] = (1+df['daily_rtn']).cumprod()
    # 누적 수익율의 마지막 데이터
    result = df['rtn'][-1]
    # 데이터프레임 리턴
    return df,  result

In [52]:
df = pd.read_csv('../stock_price.csv')

In [53]:
df.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change
0,2010-01-04,23350,24150,23350,24100,7346425,0.041037
1,2010-01-05,24500,24900,23050,23350,12118281,-0.03112
2,2010-01-06,23700,24550,23600,24550,7731186,0.051392
3,2010-01-07,25000,25200,24350,24400,11084814,-0.00611
4,2010-01-08,24650,24850,23450,24650,9695685,0.010246


In [54]:
df.tail(5)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change
3448,2023-12-21,139200,140600,138100,140500,2737124,0.0
3449,2023-12-22,142700,143700,140600,140600,3183709,0.000712
3450,2023-12-26,142000,142000,139400,140900,2239789,0.002134
3451,2023-12-27,140800,141600,139200,140400,2336777,-0.003549
3452,2023-12-28,140900,141600,139500,141500,2459473,0.007835


In [55]:
df2 = buynotsell(df, 'Close', '2010-01-04', '2023-12-28')

In [56]:
df2

(             Close  daily_rtn       rtn
 Date                                   
 2010-01-04   24100        NaN       NaN
 2010-01-05   23350  -0.031120  0.968880
 2010-01-06   24550   0.051392  1.018672
 2010-01-07   24400  -0.006110  1.012448
 2010-01-08   24650   0.010246  1.022822
 ...            ...        ...       ...
 2023-12-21  140500   0.000000  5.829876
 2023-12-22  140600   0.000712  5.834025
 2023-12-26  140900   0.002134  5.846473
 2023-12-27  140400  -0.003549  5.825726
 2023-12-28  141500   0.007835  5.871369
 
 [3453 rows x 3 columns],
 5.871369294605826)