In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [392]:
pd.options.display.max_columns = 100 # was 20
pd.options.display.max_rows = 200 # was 60

<h2>KRX에서 KOSPI200 종목들 이름과 코드 가져옴.</h2>
코드 참고: http://excelsior-cjh.tistory.com/109

In [28]:
code_df = pd.read_csv("KOSPI200.csv", encoding="EUC-KR")

In [29]:
col_kor2eng = {"회사명":"company", "종목코드":"id"}
code_df.rename(columns=col_kor2eng, inplace=True)

In [30]:
code_df = code_df[["company", "id"]]

In [32]:
code_df["id"] = code_df["id"].astype(str).apply(lambda x: x.zfill(6))

In [271]:
code_df.head()

Unnamed: 0,company,id
0,효성중공업,298040
1,HDC현대산업개발,294870
2,셀트리온,68270
3,SK케미칼,285130
4,BGF리테일,282330


<h2>네이버 금융에서 KOSPI200 종목 3년치 데이터 크롤링 함.</h2>

In [313]:
companies_names = []

for name in code_df['company']:
    companies_names.append(name)

In [314]:
companies_ids = []

for i in code_df['id']:
    companies_ids.append(i)

In [315]:
len(companies_names) # KOSPI200 회사 수

202

In [316]:
df = pd.DataFrame()
df = pd.DataFrame(pd.date_range('2013.01.01', '2018.09.30'), columns=['date'])
df.date = pd.to_datetime(df.date)

In [317]:
# companies_names = companies_names[:2]
# companies_ids = companies_ids[:2]

In [318]:
for idx in range(0,len(companies_ids)):
    
    item_name = companies_names[idx]
    url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=companies_ids[idx])
    print(item_name)
    
    current_company = pd.DataFrame()
    for page in range(1,93): # 2015.01.01 부터~ -> 93  
        pg_url = '{url}&page={page}'.format(url=url, page=page) 
        
        last_page = list(pd.read_html(pg_url, header=0)[1].columns)
        last_page = [x for x in last_page if x.isdigit()]
        last_page = max(list(map(int, last_page)))
        
        if page > last_page:
            break
        
        current_company = current_company.append(pd.read_html(pg_url, header=0)[0][["날짜", "종가"]], ignore_index=True, sort=True)
        current_company["날짜"] = pd.to_datetime(current_company["날짜"])
        current_company.drop_duplicates(subset=["날짜"], inplace=True)
    
    current_company.rename(columns={"종가":item_name, "날짜":"date"}, inplace=True)
    df = pd.merge(df, current_company, how='left', on=['date'])
        

효성중공업
HDC현대산업개발
셀트리온
SK케미칼
BGF리테일
카카오
오리온
넷마블
현대중공업지주
두산밥캣
삼성바이오로직스
동서
LIG넥스원
이노션
삼성물산
우리은행
삼성에스디에스
만도
쿠쿠홀딩스
BGF
코스맥스
종근당
현대로템
동아에스티
한국콜마
한국타이어
GS리테일
삼양사
한국항공우주
롯데하이마트
이마트
BNK금융지주
일진머티리얼즈
현대위아
현대홈쇼핑
한미약품
무학
한라홀딩스
삼성생명
한화생명
코오롱인더
락앤락
한전기술
GKL
SK
하이트진로
영원무역
LG하우시스
한세실업
NAVER
KB금융
풍산
LG이노텍
LG유플러스
한전KPS
CJ제일제당
팬오션
한진중공업
JW홀딩스
SK이노베이션
삼성카드
후성
LF
아모레퍼시픽
롯데쇼핑
현대글로비스
하나금융지주
현대리바트
금호타이어
CJ CGV
유니드
GS
LG디스플레이
대교
기업은행
강원랜드
한국금융지주
SBS
엔씨소프트
현대백화점
대웅제약
휴켐스
한샘
LG전자
S&T모티브
신한지주
코웨이
LG생활건강
LG화학
대우건설
포스코대우
대우조선해양
두산인프라코어
동원F&B
두산중공업
한국가스공사
팜스코
케이티앤지
현대중공업
AK홀딩스
대한유화
케이티
제일기획
SKC
SK하이닉스
삼성엔지니어링
HDC
한국단자공업
한온시스템
한섬
현대엘리베이터
에스원
남해화학
오뚜기
LS산전
동원시스템즈
삼성중공업
메리츠종금증권
롯데케미칼
세아베스틸
호텔신라
고려아연
한올바이오파마
SK텔레콤
현대모비스
현대해상
현대그린푸드
한국전력공사
녹십자
한솔케미칼
대덕전자
에스엘
보령제약
한국쉘석유
부광약품
한미사이언스
포스코
동국제강
삼성증권
금호석유화학
세방전지
S-Oil
한화에어로스페이스
현대제철
아이에스동서
신세계
OCI
현대건설
현대미포조선
GS건설
삼성SDI
삼성전기
빙그레
녹십자홀딩스
남양유업
LS
SK네트웍스
농심
SK디스커버리
동양
한화
JW중외제약
영풍
고려제강
넥센타이어
롯데정밀화학
대림산업
LG상사
태광산업
DB하이텍
NH투자증권
미래에셋대우
삼성화재해상보험
오리온홀딩스
삼성전자
쌍용자동차
쌍용양회공업
SPC삼립
일양약품
현대자동차

In [319]:
current_company

Unnamed: 0,date,CJ대한통운
0,NaT,
1,2018-10-05,150500.0
2,2018-10-04,151000.0
3,2018-10-02,161000.0
4,2018-10-01,158000.0
5,2018-09-28,158000.0
6,2018-09-27,156000.0
7,2018-09-21,158000.0
8,2018-09-20,158000.0
9,2018-09-19,162000.0


In [320]:
df.dropna()

Unnamed: 0,date,효성중공업,HDC현대산업개발,셀트리온,SK케미칼,BGF리테일,카카오,오리온,넷마블,현대중공업지주,...,대상,LG,동아쏘시오홀딩스,한일홀딩스,세아제강지주,삼양홀딩스,한국타이어월드와이드,대한항공,유한양행,CJ대한통운
2019,2018-07-13,55600.0,56600.0,288000.0,95100.0,179500.0,118000.0,147000.0,159000.0,347500.0,...,26600.0,72700.0,99900.0,143500.0,69500.0,111500.0,18000.0,29100.0,226000.0,170000.0
2022,2018-07-16,55500.0,54000.0,293500.0,94400.0,165500.0,116500.0,143500.0,150000.0,353500.0,...,25650.0,72200.0,99500.0,143500.0,68700.0,110500.0,18200.0,28900.0,227000.0,168000.0
2023,2018-07-17,60900.0,54000.0,295500.0,94400.0,170000.0,114000.0,136500.0,149500.0,342000.0,...,25450.0,74200.0,100500.0,143500.0,68600.0,108000.0,18100.0,29100.0,226000.0,167500.0
2024,2018-07-18,61600.0,51900.0,290000.0,94200.0,163000.0,113000.0,134000.0,148500.0,341500.0,...,24700.0,73300.0,98600.0,143500.0,66800.0,105500.0,17800.0,28600.0,220000.0,167000.0
2025,2018-07-19,61700.0,49800.0,288500.0,93200.0,159000.0,114000.0,135500.0,146000.0,336000.0,...,24100.0,73600.0,97600.0,143500.0,67900.0,103500.0,17650.0,28050.0,215000.0,155000.0
2026,2018-07-20,59900.0,53100.0,285500.0,92400.0,149500.0,115000.0,129500.0,146000.0,337500.0,...,24750.0,75300.0,98400.0,143500.0,64900.0,102500.0,18000.0,27950.0,219500.0,161500.0
2029,2018-07-23,55300.0,52900.0,267500.0,89900.0,150500.0,115500.0,131500.0,148500.0,347500.0,...,25100.0,75500.0,97800.0,143500.0,64700.0,105500.0,18200.0,27850.0,216500.0,161000.0
2030,2018-07-24,57700.0,56500.0,265000.0,90400.0,157500.0,117000.0,137500.0,147000.0,348500.0,...,25000.0,75600.0,99100.0,143500.0,68500.0,111000.0,18400.0,27500.0,220000.0,155500.0
2031,2018-07-25,56100.0,60300.0,251000.0,87600.0,161000.0,118000.0,136000.0,149000.0,348500.0,...,25250.0,75400.0,96800.0,143500.0,68900.0,110000.0,18400.0,28000.0,216500.0,156500.0
2032,2018-07-26,56100.0,59200.0,254500.0,88400.0,165000.0,117500.0,135500.0,150000.0,330500.0,...,26150.0,76300.0,97800.0,143500.0,71200.0,112500.0,18450.0,28400.0,224500.0,157500.0


In [347]:
df = df.fillna(method='ffill')
df = df.fillna(0)

In [393]:
df

Unnamed: 0,date,효성중공업,HDC현대산업개발,셀트리온,SK케미칼,BGF리테일,카카오,오리온,넷마블,현대중공업지주,두산밥캣,삼성바이오로직스,동서,LIG넥스원,이노션,삼성물산,우리은행,삼성에스디에스,만도,쿠쿠홀딩스,BGF,코스맥스,종근당,현대로템,동아에스티,한국콜마,한국타이어,GS리테일,삼양사,한국항공우주,롯데하이마트,이마트,BNK금융지주,일진머티리얼즈,현대위아,현대홈쇼핑,한미약품,무학,한라홀딩스,삼성생명,한화생명,코오롱인더,락앤락,한전기술,GKL,SK,하이트진로,영원무역,LG하우시스,한세실업,...,빙그레,녹십자홀딩스,남양유업,LS,SK네트웍스,농심,SK디스커버리,동양,한화,JW중외제약,영풍,고려제강,넥센타이어,롯데정밀화학,대림산업,LG상사,태광산업,DB하이텍,NH투자증권,미래에셋대우,삼성화재해상보험,오리온홀딩스,삼성전자,쌍용자동차,쌍용양회공업,SPC삼립,일양약품,현대자동차,한화케미칼,롯데지주,기아자동차,효성,CJ,두산,DB손해보험,영진약품,케이씨씨,롯데칠성음료,롯데푸드,아모레퍼시픽그룹,대상,LG,동아쏘시오홀딩스,한일홀딩스,세아제강지주,삼양홀딩스,한국타이어월드와이드,대한항공,유한양행,CJ대한통운
0,2013-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2013-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2013-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2013-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2013-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2013-01-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2013-01-07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2013-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,2013-01-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2013-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [349]:
df.to_pickle('KOSPI200_2013-2018.p')

<h2>momentum strategy 세팅</h2>

In [388]:
class MovingWindow:
    def __init__(self):
        self.basket = []
        self.data = pd.DataFrame()
        self.window = pd.Timedelta('365 days')
        self.current_date = pd.Timestamp('2014-12-31 00:00:00')
    
#     def get_basket(self):
        
    
#     def yearly_return(self):
#         self.basket

In [389]:
t = pd.Timedelta('60 days')

In [390]:
s1 = MovingWindow()

In [394]:
df.loc[df['date']==s1.current_date]

Unnamed: 0,date,효성중공업,HDC현대산업개발,셀트리온,SK케미칼,BGF리테일,카카오,오리온,넷마블,현대중공업지주,두산밥캣,삼성바이오로직스,동서,LIG넥스원,이노션,삼성물산,우리은행,삼성에스디에스,만도,쿠쿠홀딩스,BGF,코스맥스,종근당,현대로템,동아에스티,한국콜마,한국타이어,GS리테일,삼양사,한국항공우주,롯데하이마트,이마트,BNK금융지주,일진머티리얼즈,현대위아,현대홈쇼핑,한미약품,무학,한라홀딩스,삼성생명,한화생명,코오롱인더,락앤락,한전기술,GKL,SK,하이트진로,영원무역,LG하우시스,한세실업,...,빙그레,녹십자홀딩스,남양유업,LS,SK네트웍스,농심,SK디스커버리,동양,한화,JW중외제약,영풍,고려제강,넥센타이어,롯데정밀화학,대림산업,LG상사,태광산업,DB하이텍,NH투자증권,미래에셋대우,삼성화재해상보험,오리온홀딩스,삼성전자,쌍용자동차,쌍용양회공업,SPC삼립,일양약품,현대자동차,한화케미칼,롯데지주,기아자동차,효성,CJ,두산,DB손해보험,영진약품,케이씨씨,롯데칠성음료,롯데푸드,아모레퍼시픽그룹,대상,LG,동아쏘시오홀딩스,한일홀딩스,세아제강지주,삼양홀딩스,한국타이어월드와이드,대한항공,유한양행,CJ대한통운
729,2014-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [386]:
# df.loc[~(df==0).all(axis=1)]

In [405]:
# ax = test.plot(x='date', y='삼성물산', kind='bar')

# test.plot(x='date', y='삼성에스디에스', kind='bar')
# test.plot(x='date', y='GS리테일', kind='bar')

Unnamed: 0,date,삼성물산,삼성에스디에스,GS리테일
0,2013-01-01,0.0,0.0,0.0
1,2013-01-02,0.0,0.0,0.0
2,2013-01-03,0.0,0.0,0.0
3,2013-01-04,0.0,0.0,0.0
4,2013-01-05,0.0,0.0,0.0
5,2013-01-06,0.0,0.0,0.0
6,2013-01-07,0.0,0.0,0.0
7,2013-01-08,0.0,0.0,0.0
8,2013-01-09,0.0,0.0,0.0
9,2013-01-10,0.0,0.0,0.0
