In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
import numpy as np
from pykrx import stock
import math
from random import *
import random
def labellingD0(d0) -> str:
    '''
    D0 시점의 각 봉에 대한 라벨링 25가지
    '''
    openP = d0['open']
    highP = d0['high']
    lowP = d0['low']
    closeP = d0['close']
    
    # 장대 양봉
    if closeP >= 1.1*openP:
        if closeP < highP:
            if openP > lowP:
                return "P15"
            if highP - 2*closeP + openP >= 0:
                return "P14"
            return "P13"
        if openP > lowP:
            return "P11"
        return "P10"

    # 짧은 양봉
    elif closeP >= 1.005*openP:
        if closeP < highP:
            if openP > lowP:
                return "P05"
            if highP -3*closeP + 2*openP >= 0:
                return "P04"
            return "P03"
        if openP > lowP:
            if 2*highP - 3*openP + lowP >= 0:
                return "P01"
            return "P02"
        return "P00"

    # 보합
    elif closeP >= openP:
        end_min = closeP - lowP
        Max_end = highP - closeP
        if end_min > Max_end*3:
            return "K01"
        elif end_min*3 < Max_end:
            return "K02"
        return "K00"

    # 짧은 음봉
    elif closeP >= 0.9*openP:
        if openP < highP :
            if closeP > lowP :
                return "M05"
            if highP -3*openP +2*closeP < 0:
                return "M03"
            return "M04"
        if closeP > lowP :
            if 3*closeP - lowP - 2*openP < 0:
                return "M01"
            return "M02"
        return "M00"

    # 장대 음봉
    else:
        if closeP > lowP :
            if openP < highP :
                return "M15"
            return "M11"
        if openP < highP :
            if highP -2*openP + closeP >= 0:
                return "M14"
            return "M13"
        return "M10"


def labellingD1(d10) -> str:
    '''
    D1 5가지 x D0 25가지
    '''
    temp = d10.iloc[0]
    openP = temp['open']
    highP = temp['high']
    lowP = temp['low']
    closeP = temp['close']
    
    # 장대 양봉
    if closeP >= 1.1*openP:
        res = "P10"

    # 짧은 양봉
    elif closeP >= 1.005*openP:
        res = "P00"

    # 보합
    elif closeP >= openP:
        res = "K00"

    # 짧은 음봉
    elif closeP >= 0.9*openP:
        res = "M00"

    # 장대 음봉
    else:
        res = "M10"

    return res + labellingD0(d10.iloc[1])

def labellingD2(d210):
    d2_openP = d210.iloc[0]['open']
    d2_closeP = d210.iloc[0]['close']
    d1_openP = d210.iloc[1]['open']
    d1_closeP = d210.iloc[1]['close']
    
    d21_max = max(d2_openP, d2_closeP, d1_openP, d1_closeP)
    d21_avg = (d21_max + min(d2_openP, d2_closeP, d1_openP, d1_closeP))/2
    if d21_max/d21_avg <= 1.005:
        res = "S04"
    elif d2_openP <= d2_closeP: # D2 양봉
        if d1_openP <= d1_closeP:
            res = "P10"
        elif d2_openP >= d1_closeP:
            res = "S07"
        elif d2_closeP > d1_openP:
            res = "S06"
        elif d2_closeP >= d1_closeP:
            res = "S03"
        else:
            res = "S05"
    elif d2_openP >= d2_closeP: # D2 음봉
        if d1_openP >= d1_closeP:
            res = "M10"
        elif d2_closeP < d1_openP:
            res = "S01"
        elif d2_openP <= d1_closeP:
            res = "S02"
        elif d2_closeP >= d1_closeP:
            res = "S08"
        else:
            res = "S00" 
    else:
        res = "S09"
        
    return res + labellingD0(d210.iloc[2])
stock_list = pd.read_csv('../stockcode.csv')

In [None]:
stockData = stock.get_market_ohlcv_by_date("20120101", "20201022", "001470")    # TODO: 종목코드를 csv에서 불러오기
stockData.columns = pd.Index(["open", "high","low","close","volume"],name=stockData.columns.name)
if stockData["volume"][-1] == 0:
    pass
else : 
    stockData['pattern1'] = None
    for i in range(len(stockData)):
        stockData['pattern1'].values[i]  = labellingD0(stockData.iloc[i])

    stockData['pattern2'] = None
    for i in range(1,len(stockData)):
        stockData['pattern2'].values[i] = labellingD1(stockData.iloc[i-1:i+1])

    stockData['pattern3'] = None
    for i in range(2,len(stockData)):
        stockData['pattern3'].values[i] = labellingD2(stockData.iloc[i-2:i+1])
##############################################################################################################################    
    #globals()['A' + stock_code + '_1bong'] = stockData["pattern1"]


    bong_list = []
    for i in range(0, len(stockData)-3):
        pattern_list = []
        pattern_list.append("01" + stockData["pattern1"][i])
        pattern_list.append("02" + stockData["pattern1"][i+1])
        pattern_list.append("03" + stockData["pattern1"][i+2])
        bong_list.append(pattern_list)
    d0 = "02" + stockData["pattern1"][-2]
    d1 = "01" + stockData["pattern1"][-3]
    A = {'P_score' : 0, 'real' : stockData['pattern1'][-1], 'stock_name' : stockData.columns.name, 
         'P0' : 0, 'P1' : 0, 'M0' : 0, 'M1' : 0, 'K0' : 0}
    for bong_order in range(len(bong_list)) :

        if d0 in bong_list[bong_order]:
            if d1 in bong_list[bong_order] :
                date_weight = math.floor(bong_order/10)
                if "P0" in bong_list[bong_order][2] :
                    A["P0"] += 1+(0.01*date_weight)
                elif "P1" in bong_list[bong_order][2] :
                    A["P1"] += 1+(0.01*date_weight)
                elif "M0" in bong_list[bong_order][2] :
                    A["M0"] += 1+(0.01*date_weight)
                elif "M1" in bong_list[bong_order][2] :
                    A["M1"] += 1+(0.01*date_weight)
                elif "K0" in bong_list[bong_order][2] :
                    A["K0"] += 1+(0.01*date_weight)
    if (A['P0']+A['P1']+A['M0']+A['M1']+A['K0']) == 0:
        pass
    else :
        A['P_score'] = round(((A['P0']+A['P1']) / (A['P0']+A['P1']+A['M0']+A['M1']+A['K0']))*100, 2)
        print(A)

In [2]:
stockData = stock.get_market_ohlcv_by_date("20120101", "20201022", "001470")
stockData.columns = pd.Index(["open", "high","low","close","volume"],name=stockData.columns.name)

In [3]:
stockData

삼부토건,open,high,low,close,volume
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-02,30000,30499,29500,29950,5445
2012-01-03,29500,30250,29400,29500,10621
2012-01-04,29500,29950,28850,28900,16801
2012-01-05,28900,29849,28700,29050,10982
2012-01-06,29250,29250,28450,28900,21890
...,...,...,...,...,...
2020-10-16,2465,2600,2460,2525,6294302
2020-10-19,2550,2630,2270,2290,9080650
2020-10-20,2380,2570,2270,2550,7157660
2020-10-21,2550,3030,2550,2950,22374229


In [70]:
stockData.index.year

Int64Index([2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012,
            ...
            2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020],
           dtype='int64', name='날짜', length=2167)

In [14]:
import datetime
from datetime import timedelta

In [48]:
stock_date = list(stockData.index)

In [55]:
stock_date

[Timestamp('2012-01-02 00:00:00'),
 Timestamp('2012-01-03 00:00:00'),
 Timestamp('2012-01-04 00:00:00'),
 Timestamp('2012-01-05 00:00:00'),
 Timestamp('2012-01-06 00:00:00'),
 Timestamp('2012-01-09 00:00:00'),
 Timestamp('2012-01-10 00:00:00'),
 Timestamp('2012-01-11 00:00:00'),
 Timestamp('2012-01-12 00:00:00'),
 Timestamp('2012-01-13 00:00:00'),
 Timestamp('2012-01-16 00:00:00'),
 Timestamp('2012-01-17 00:00:00'),
 Timestamp('2012-01-18 00:00:00'),
 Timestamp('2012-01-19 00:00:00'),
 Timestamp('2012-01-20 00:00:00'),
 Timestamp('2012-01-25 00:00:00'),
 Timestamp('2012-01-26 00:00:00'),
 Timestamp('2012-01-27 00:00:00'),
 Timestamp('2012-01-30 00:00:00'),
 Timestamp('2012-01-31 00:00:00'),
 Timestamp('2012-02-01 00:00:00'),
 Timestamp('2012-02-02 00:00:00'),
 Timestamp('2012-02-03 00:00:00'),
 Timestamp('2012-02-06 00:00:00'),
 Timestamp('2012-02-07 00:00:00'),
 Timestamp('2012-02-08 00:00:00'),
 Timestamp('2012-02-09 00:00:00'),
 Timestamp('2012-02-10 00:00:00'),
 Timestamp('2012-02-

In [53]:
series_date = pd.Series(stockData.index, dtype = "datetime")

TypeError: data type "datetime" not understood

In [66]:
df = pd.DataFrame({"date" : stock_date})

In [67]:
df

Unnamed: 0,date
0,2012-01-02
1,2012-01-03
2,2012-01-04
3,2012-01-05
4,2012-01-06
...,...
2162,2020-10-16
2163,2020-10-19
2164,2020-10-20
2165,2020-10-21


In [68]:
df.date

0      2012-01-02
1      2012-01-03
2      2012-01-04
3      2012-01-05
4      2012-01-06
          ...    
2162   2020-10-16
2163   2020-10-19
2164   2020-10-20
2165   2020-10-21
2166   2020-10-22
Name: date, Length: 2167, dtype: datetime64[ns]

In [51]:
series_date

0      2012-01-02
1      2012-01-03
2      2012-01-04
3      2012-01-05
4      2012-01-06
          ...    
2162   2020-10-16
2163   2020-10-19
2164   2020-10-20
2165   2020-10-21
2166   2020-10-22
Name: 날짜, Length: 2167, dtype: datetime64[ns]

In [17]:
import calendar as cal

In [71]:
year, month = stockData.index.year, stockData.index.month

In [43]:
year.dtype

dtype('int64')

In [79]:
box = cal.Calendar().monthdatescalendar(year.any(), month.any())
box = np.array(box, dtype = str)
box

array([['0001-01-01', '0001-01-02', '0001-01-03', '0001-01-04',
        '0001-01-05', '0001-01-06', '0001-01-07'],
       ['0001-01-08', '0001-01-09', '0001-01-10', '0001-01-11',
        '0001-01-12', '0001-01-13', '0001-01-14'],
       ['0001-01-15', '0001-01-16', '0001-01-17', '0001-01-18',
        '0001-01-19', '0001-01-20', '0001-01-21'],
       ['0001-01-22', '0001-01-23', '0001-01-24', '0001-01-25',
        '0001-01-26', '0001-01-27', '0001-01-28'],
       ['0001-01-29', '0001-01-30', '0001-01-31', '0001-02-01',
        '0001-02-02', '0001-02-03', '0001-02-04']], dtype='<U10')