In [None]:
import pandas as pd
import pandas_datareader as pdr
from datetime import date, timedelta, datetime

In [None]:
# 종목 타입에 따라 download url이 다름. 종목코드 뒤에 .KS .KQ등이 입력되어야해서 Download Link 구분 필요
stock_type = {
    'kospi': 'stockMkt',
    'kosdaq': 'kosdaqMkt'
}

# 회사명으로 주식 종목 코드를 획득할 수 있도록 하는 함수
def get_code(df, name):
    code = df.query("name=='{}'".format(name))['code'].to_string(index=False)
    # 위와같이 code명을 가져오면 앞에 공백이 붙어있는 상황이 발생하여 앞뒤로 sript() 하여 공백 제거
    code = code.strip()
    return code

# download url 조합
def get_download_stock(market_type=None):
    market_type = stock_type[market_type]
    download_link = 'http://kind.krx.co.kr/corpgeneral/corpList.do'
    download_link = download_link + '?method=download'
    download_link = download_link + '&marketType=' + market_type
    df = pd.read_html(download_link, header=0)[0]
    return df;

# kospi 종목코드 목록 다운로드
def get_download_kospi():
    df = get_download_stock('kospi')
    df.종목코드 = df.종목코드.map('{:06d}.KS'.format)
    return df

# kosdaq 종목코드 목록 다운로드
def get_download_kosdaq():
    df = get_download_stock('kosdaq')
    df.종목코드 = df.종목코드.map('{:06d}.KQ'.format)
    return df

# kospi, kosdaq 종목코드 각각 다운로드
kospi_df = get_download_kospi()
kosdaq_df = get_download_kosdaq()
# data frame merge
code_df = pd.concat([kospi_df, kosdaq_df])
# data frame정리
code_df = code_df[['회사명', '종목코드']]
# data frame title 변경 '회사명' = name, 종목코드 = 'code'
code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'})

In [None]:
end_date_dt = date.today()
start_date_dt = end_date_dt - timedelta(days=60)
end_date = end_date_dt.strftime("%Y-%m-%d")
start_date = start_date_dt.strftime("%Y-%m-%d")

start_date, end_date

In [None]:
# # test
code = get_code(code_df, '삼성전자')
df_price = pdr.get_data_yahoo(code, start=start_date, end=end_date).reset_index()
df_price

In [None]:
df_stock_yield = pd.DataFrame(columns=['stock_code', 'start_price', 'end_price', 'yield'])

In [None]:
stock_yields = []
for i, row in enumerate(code_df.iterrows()):
    if i < 86:
        continue
    #market = row[1].code[-2:]
    #stock_code = row[1].code[:-3]
    
    try:
        df_price = pdr.get_data_yahoo(row[1].code, start=start_date, end=end_date).reset_index()
    except:
        print(i, "no price:", row[1]['name'], row[1].code)
        continue
        
    if len(df_price) == 0:
        print(i, "no price:", row[1]['name'], row[1].code)
        continue
        
    # 마지막 가격 날짜 확인
    df_price_end_date = df_price.iloc[len(df_price) - 1].Date
    df_price_end_date = datetime.strptime(str(df_price_end_date)[:10], '%Y-%m-%d').date()

    if (end_date_dt - df_price_end_date).days >= 7: # 최근 일주일 내 거래되지 않았으면 제거
        print(i, "no recent price:", row[1]['name'], df_price_end_date)
        continue

    start_price = df_price.iloc[0].Open
    end_price = df_price.iloc[len(df_price) - 1].Close
    stock_yield = end_price / start_price 

    stock_yields.append({'stock_code': row[1].code[:-3], 'start_price': start_price, 'end_price': end_price, 'yield': stock_yield})

    print(i, len(stock_yields), row[1]['name'], row[1]['code'], stock_yield)

In [None]:
df_stock_yield = df_stock_yield.append(stock_yields)

In [None]:
df_stock_yield.to_csv("predict.csv")