# 환경 세팅

## 라이브러리 세팅

In [221]:
import pandas as pd
import numpy as np
import pandas_datareader as pdr
from datetime import datetime, timedelta
import radar
import random
import statsmodels.formula.api as sm

In [226]:
df = pd.DataFrame({"A": [10,20,30,40,50], "B": [20, 30, 10, 40, 50], "C": [32, 234, 23, 23, 42523]})
result = sm.ols(formula="A ~ B", data=df).fit()
print(result.params)

Intercept    9.0
B            0.7
dtype: float64


In [85]:
(datetime(2021, 1, 1) + timedelta(days=3)).strftime("%Y-%m-%d")

'2021-01-04'

In [88]:
radar.random_datetime(
    start = datetime(year=2000, month=5, day=24),
    stop = datetime(year=2013, month=5, day=24)
).strftime("%Y-%m-%d")

'2013-05-09'

# 데이터 테스트

## 상장법인목록

In [3]:
krx_list = pd.read_csv("상장법인목록.csv", encoding='cp949')

In [45]:
krx_list.head()

Unnamed: 0,회사명,종목코드,업종,주요제품,상장일,결산월,대표자명,홈페이지,지역
0,미래에셋글로벌리츠,396690,부동산 임대 및 공급업,부동산 임대업,2021-12-03,03월,신현준,,서울특별시
1,대신밸런스제11호스팩,397500,금융 지원 서비스업,금융 지원 서비스업,2021-12-02,12월,정재중,,서울특별시
2,켈스,402420,의료용 기기 제조업,체외진단 의료기기,2021-11-30,12월,이동호,http://www.thecalth.com/,경기도
3,SK스퀘어,402340,기타 금융업,지주회사,2021-11-29,12월,박정호,http://www.sksquare.com,서울특별시
4,마인즈랩,377480,소프트웨어 개발 및 공급업,종합 인공지능 엔진 및 플랫폼,2021-11-23,12월,유태준,maum.ai,대전광역시


In [13]:
kospi_list = pd.read_csv("코스피200.csv", encoding='cp949')

In [127]:
kospi_list['종목코드'] = kospi_list['종목코드'].map('{:06d}'.format)

In [131]:
kospi_list.head()

Unnamed: 0,종목코드,종목명,종가,대비,등락률,상장시가총액
0,5930,삼성전자,75600,-200,-0.26,451315561.0
1,660,SK하이닉스,118000,-2000,-1.67,85904279.0
2,35420,NAVER,402000,3500,0.88,66033885.0
3,207940,삼성바이오로직스,900000,8000,0.9,59548500.0
4,35720,카카오,123500,1000,0.82,55043733.0


## pandas_datareader 테스트

In [69]:
dayInterval = 30
baseDate = datetime.strptime("2021-09-10", "%Y-%m-%d")
startDate = (baseDate + timedelta(days = -dayInterval)).strftime("%Y-%m-%d")
endDate = (baseDate + timedelta(days = +dayInterval)).strftime("%Y-%m-%d")

In [111]:
stock_test2 = pdr.get_data_yahoo('000660.ks', f'2020-02-01', f'2020-11-30')

In [110]:
stock_test = pdr.get_data_yahoo('000660.ks', f'{startDate}', f'{endDate}')

In [125]:
random.sample(stock_test2.index.tolist(),3)

[Timestamp('2020-02-10 00:00:00'),
 Timestamp('2020-05-11 00:00:00'),
 Timestamp('2020-02-12 00:00:00')]

In [117]:
stock_test2.index.tolist()[0].strftime("%Y-%m-%d")

'2020-02-03'

In [70]:
stock_test = pdr.get_data_yahoo('000660.ks', f'{startDate}', f'{endDate}')

In [71]:
stock_test.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')

# 분석함수 만들기

* 구성요소
    1. 날짜 추출 함수
    2. 이전과 이후 한달 간의 주가 확인
    3. 추출 당일의 주가가 이전 한달에 비해 어느 정도 수준인지 확인
    4. 백분율로 환산
    5. 위치가 어느정도 되는지 판단
    6. 기간 _ 증가/감소 확인
    7. aaa

## 작동테스트

In [208]:
mdate = "2010-10-05"
rdate = "1899-12-31"
mdate1 = datetime.strptime(mdate, "%Y-%m-%d").date()
rdate1 = datetime.strptime(rdate, "%Y-%m-%d").date()
delta =  (mdate1 - rdate1).days
print(delta)

40455


In [74]:
#이전기간에서 상대적인 위치

In [202]:
stock_test_before = stock_test.loc[(stock_test.index <= baseDate.strftime("%Y-%m-%d")),].copy()

In [217]:
stock_test_before['rank_pct'] = stock_test_before['Close'].rank(pct=True, ascending=False)*100/10
stock_test_before['rank_pct'] = stock_test_before['rank_pct'].round(0)

In [204]:
stock_test_before.index[0].year

2021

In [228]:
min_date = min(stock_test_before.index.tolist()).date()
stock_test_before['dateInt'] = stock_test_before.index.map(lambda x : (x.date() - min_date).days + 1)

In [229]:
stock_test_before

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,rank_pct,dateInt
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-08-11,110000,105000,109000,105500,13715267,105500,3.0,1
2021-08-12,104000,99800,102000,100500,19150647,100500,10.0,2
2021-08-13,101500,98900,99600,101500,11622957,101500,9.0,3
2021-08-17,103500,100500,101000,101500,7851717,101500,9.0,7
2021-08-18,105500,101500,103000,104000,8047599,104000,5.0,8
2021-08-19,105500,101500,103000,102500,6161038,102500,8.0,9
2021-08-20,104500,102000,104000,102500,4719139,102500,8.0,10
2021-08-23,105000,103000,104500,103000,4515699,103000,8.0,13
2021-08-24,105500,104000,104500,105000,4454743,105000,4.0,14
2021-08-25,106000,102500,106000,103500,3194555,103500,6.0,15


In [179]:
stock_test_before[stock_test_before.index == "2021-09-10"]["rank_pct"][0]

4.0

In [180]:
def randomStocks(cntStocks: int):
    
    """
    랜덤하게 종목 선정
    """
    
    return random.sample(kospi_list['종목코드'].tolist(), cntStocks)

In [181]:
def getDateList(startDate : str, endDate : str, code: str, cntDays: int):
    
    """
    yahoo 패키지의 경우 빈 값이 많음 이에 따라 값이 있는 데이터들에서 
    랜덤하게 날짜를 추출해 보기로 함
    """
    
    stockDf = pdr.get_data_yahoo(f'{code}.ks', f'{startDate}', f'{endDate}')
    
    
    return random.sample(stockDf.index.tolist(), cntDays)

In [184]:
randomDate("2020-01-01", "2020-12-30")

'2020-10-11'

In [182]:
def getDateVal(valDate : datetime, dayInterval : int):
    
    """
    문자형식으로 날짜를 받아, interval 간격 만큼의 기간 범위를 설정합니다.
    """
    
    baseDate_str = valDate.strftime("%Y-%m-%d")
    startDate = (valDate + timedelta(days = -dayInterval)).strftime("%Y-%m-%d")
    endDate = (valDate + timedelta(days = +dayInterval)).strftime("%Y-%m-%d")
    
    return baseDate_str, startDate, endDate

In [231]:
def getStockValue(baseDate: datetime, code: str, dayInterval: int):
    
    """
    
    
    """
    
    baseDate_str = baseDate.strftime("%Y-%m-%d")
    startDate = (baseDate + timedelta(days = -dayInterval)).strftime("%Y-%m-%d")
    endDate = (baseDate + timedelta(days = +dayInterval)).strftime("%Y-%m-%d")    
    
    stockDf = pdr.get_data_yahoo(f'{code}.ks', f'{startDate}', f'{endDate}')
    
    #1. baseDate의 값 위치
    stockDf_before = stockDf.loc[(stockDf.index <= baseDate_str),].copy()
    stockDf_before['rank_pct'] = stockDf_before['Close'].rank(pct=True, ascending=True)*100/10
    stockDf_before['rank_pct'] = stockDf_before['rank_pct'].round(0)
    
    #1. Output
    stockDf_baseDate_rank - stockDf_before[stockDf_before.index == f"{baseDate_str}"]["rank_pct"][0]
    stockDf_baseDate_Close - stockDf_before[stockDf_before.index == f"{baseDate_str}"]["Close"][0]
    
    
    #2. baseDate 이후의 종가가 baseDate의 종가보다 높거나 낮은 일수
    stockDf_after = stockDf.loc[(stockDf.index > baseDate_str),].copy()
    stockDf_after['basePrice'] = stockDf_baseDate_Close
    stockDf_after['priceGroup'] = stockDf_after['Close'].map(lambda x : 1 if x > stockDf_baseDate_Close else 0)
    
    upperCnt = sum(stockDf_after['priceGroup'].tolist())
    totalCnt = len(stockDf_after['priceGroup'].tolist())
    
    #2. Output
    probUpperCnt = int((upperCnt/totalCnt)*100)
    
    
    #3. baseDate 이후의 주가 기울기
    min_date = min(stockDf_after.index.tolist()).date()
    stockDf_after['dateInt'] = stockDf_after.index.map(lambda x : (x.date() - min_date).days + 1)
    regResult = sm.ols(fomula = "Close ~ dateInt", data = stockDf_after).fit()
    
    #3, Ouput
    regSlope = regResult.params[1]
    
    return stockDf_baseDate_rank, probUpperCnt, regSlope

In [187]:
len([1, 2, 3, 0, 0])

5

In [194]:
int(0.335)

34

In [196]:
int(0.335*100)

33

In [None]:
def getStockDict():
    
    #하락하락하락하락 횡보횡보 상승상승상승상승
    
    dictProbUpperCnt = dict()
    dictRegSlope = dict()
    
    for stock_val in randomStocks():
        
        for date_val in getDateList():
            
            
            