In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
import numpy as np
from pykrx import stock

In [2]:
def labellingD0(d0) -> str:
    '''
    D0 시점의 각 봉에 대한 라벨링 25가지
    '''
    openP = d0['open']
    highP = d0['high']
    lowP = d0['low']
    closeP = d0['close']
    
    # 장대 양봉
    if closeP >= 1.1*openP:
        if closeP < highP:
            if openP > lowP:
                return "P15"
            if highP - 2*closeP + openP >= 0:
                return "P14"
            return "P13"
        if openP > lowP:
            return "P11"
        return "P10"

    # 짧은 양봉
    elif closeP >= 1.005*openP:
        if closeP < highP:
            if openP > lowP:
                return "P05"
            if highP -3*closeP + 2*openP >= 0:
                return "P04"
            return "P03"
        if openP > lowP:
            if 2*highP - 3*openP + lowP >= 0:
                return "P01"
            return "P02"
        return "P00"

    # 보합
    elif closeP >= openP:
        end_min = closeP - lowP
        Max_end = highP - closeP
        if end_min > Max_end*3:
            return "K01"
        elif end_min*3 < Max_end:
            return "K02"
        return "K00"

    # 짧은 음봉
    elif closeP >= 0.9*openP:
        if openP < highP :
            if closeP > lowP :
                return "M05"
            if highP -3*openP +2*closeP < 0:
                return "M03"
            return "M04"
        if closeP > lowP :
            if 3*closeP - lowP - 2*openP < 0:
                return "M01"
            return "M02"
        return "M00"

    # 장대 음봉
    else:
        if closeP > lowP :
            if openP < highP :
                return "M15"
            return "M11"
        if openP < highP :
            if highP -2*openP + closeP >= 0:
                return "M14"
            return "M13"
        return "M10"


def labellingD1(d10) -> str:
    '''
    D1 5가지 x D0 25가지
    '''
    temp = d10.iloc[0]
    openP = temp['open']
    highP = temp['high']
    lowP = temp['low']
    closeP = temp['close']
    
    # 장대 양봉
    if closeP >= 1.1*openP:
        res = "P10"

    # 짧은 양봉
    elif closeP >= 1.005*openP:
        res = "P00"

    # 보합
    elif closeP >= openP:
        res = "K00"

    # 짧은 음봉
    elif closeP >= 0.9*openP:
        res = "M00"

    # 장대 음봉
    else:
        res = "M10"

    return res + labellingD0(d10.iloc[1])

def labellingD2(d210):
    d2_openP = d210.iloc[0]['open']
    d2_closeP = d210.iloc[0]['close']
    d1_openP = d210.iloc[1]['open']
    d1_closeP = d210.iloc[1]['close']
    
    d21_max = max(d2_openP, d2_closeP, d1_openP, d1_closeP)
    d21_avg = (d21_max + min(d2_openP, d2_closeP, d1_openP, d1_closeP))/2
    if d21_max/d21_avg <= 1.005:
        res = "S04"
    elif d2_openP <= d2_closeP: # D2 양봉
        if d1_openP <= d1_closeP:
            res = "P10"
        elif d2_openP >= d1_closeP:
            res = "S07"
        elif d2_closeP > d1_openP:
            res = "S06"
        elif d2_closeP >= d1_closeP:
            res = "S03"
        else:
            res = "S05"
    elif d2_openP >= d2_closeP: # D2 음봉
        if d1_openP >= d1_closeP:
            res = "M10"
        elif d2_closeP < d1_openP:
            res = "S01"
        elif d2_openP <= d1_closeP:
            res = "S02"
        elif d2_closeP >= d1_closeP:
            res = "S08"
        else:
            res = "S00" 
    else:
        res = "S09"
        
    return res + labellingD0(d210.iloc[2])

In [3]:
stock_list = pd.read_csv('../stockcode.csv')
stock_list

Unnamed: 0,회사명,종목코드
0,동화약품,20
1,KR모터스,40
2,경방,50
3,메리츠화재,60
4,삼양홀딩스,70
...,...,...
2375,코오롱티슈진,950160
2376,JTC,950170
2377,SNK,950180
2378,미투젠,950190


In [None]:
today = pd.Timestamp.now()
today = str(today.year)+str(today.month)+str(today.day)
for stock_index in range(len(stock_list)):
    stock_code = str(stock_list['종목코드'][stock_index])
    if len(stock_code) < 6 : 
        stock_code = "0"*(6-len(stock_code)) + stock_code
    stockData = stock.get_market_ohlcv_by_date("20120101", today, stock_code)    # TODO: 종목코드를 csv에서 불러오기
    stockData.columns = pd.Index(["open", "high","low","close","volume"],name=stockData.columns.name)

    stockData['pattern1'] = None
    for i in range(len(stockData)):
        stockData['pattern1'].values[i]  = labellingD0(stockData.iloc[i])

    stockData['pattern2'] = None
    for i in range(1,len(stockData)):
        stockData['pattern2'].values[i] = labellingD1(stockData.iloc[i-1:i+1])

    stockData['pattern3'] = None
    for i in range(2,len(stockData)):
        stockData['pattern3'].values[i] = labellingD2(stockData.iloc[i-2:i+1])
##############################################################################################################################    
    globals()['A' + stock_code + '_1bong'] = stockData["pattern1"]
    
    
    globals()['A' + stock_code + '_bong_list'] = []
    for i in range(0, len(stockData)-2):
        globals()['A' + stock_code + '_1bong' + str(i)] = []
        globals()['A' + stock_code + '_1bong' + str(i)].append("01" + stockData["pattern1"][i])
        globals()['A' + stock_code + '_1bong' + str(i)].append("02" + stockData["pattern1"][i+1])
        globals()['A' + stock_code + '_1bong' + str(i)].append("03" + stockData["pattern1"][i+2])
        globals()['A' + stock_code + '_bong_list'].append(globals()['A' + stock_code + '_1bong'+ str(i)])
    d0 = "02" + stockData["pattern1"][-1]
    d1 = "01" + stockData["pattern1"][-2]
    A = {'P_score' : 0, 'stock_name' : stockData.columns.name, 'stock_code' : stock_code, 
         'P0' : 0, 'P1' : 0, 'M0' : 0, 'M1' : 0, 'K0' : 0}
    for s in globals()['A' + stock_code + '_bong_list'] :
        if d0 in s:
            if d1 in s :
                if "P0" in s[2] :
                    A["P0"] += 1
                elif "P1" in s[2] :
                    A["P1"] += 1
                elif "M0" in s[2] :
                    A["M0"] += 1
                elif "M1" in s[2] :
                    A["M1"] += 1
                elif "K0" in s[2] :
                    A["K0"] += 1
    if (A['P0']+A['P1']+A['M0']+A['M1']+A['K0']) == 0:
        pass
    else :
        A['P_score'] = ((A['P0']+A['P1']) / (A['P0']+A['P1']+A['M0']+A['M1']+A['K0']))
    print(A)

{'P_score': 0.350210970464135, 'stock_name': '동화약품', 'stock_code': '000020', 'P0': 83, 'P1': 0, 'M0': 120, 'M1': 0, 'K0': 34}
{'P_score': 0.3076923076923077, 'stock_name': 'KR모터스', 'stock_code': '000040', 'P0': 8, 'P1': 0, 'M0': 15, 'M1': 0, 'K0': 3}
{'P_score': 0.75, 'stock_name': '경방', 'stock_code': '000050', 'P0': 3, 'P1': 0, 'M0': 1, 'M1': 0, 'K0': 0}
{'P_score': 0.31666666666666665, 'stock_name': '메리츠화재', 'stock_code': '000060', 'P0': 19, 'P1': 0, 'M0': 35, 'M1': 0, 'K0': 6}
{'P_score': 0.75, 'stock_name': '삼양홀딩스', 'stock_code': '000070', 'P0': 3, 'P1': 0, 'M0': 1, 'M1': 0, 'K0': 0}
{'P_score': 0.29365079365079366, 'stock_name': '하이트진로', 'stock_code': '000080', 'P0': 37, 'P1': 0, 'M0': 70, 'M1': 0, 'K0': 19}
{'P_score': 0.4166666666666667, 'stock_name': '유한양행', 'stock_code': '000100', 'P0': 99, 'P1': 1, 'M0': 108, 'M1': 0, 'K0': 32}
{'P_score': 0.3722627737226277, 'stock_name': 'CJ대한통운', 'stock_code': '000120', 'P0': 51, 'P1': 0, 'M0': 72, 'M1': 0, 'K0': 14}
{'P_score': 0.38571428

In [94]:
stock_code

'033180'

In [96]:
A033180_1bong

날짜
2012-01-02    P05
2012-01-03    P05
2012-01-04    P03
2012-01-05    M02
2012-01-06    P05
             ... 
2020-10-15    M05
2020-10-16    M05
2020-10-19    M05
2020-10-20    P05
2020-10-21    K00
Name: pattern1, Length: 2166, dtype: object

In [31]:
bong_list[2161]

['01M05', '02M05', '03P05']

In [28]:
te = TransactionEncoder()
te_ary = te.fit(bong_list).transform(bong_list)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [29]:
df

Unnamed: 0,01K00,01K01,01K02,01M00,01M01,01M02,01M03,01M04,01M05,01P00,...,03M03,03M04,03M05,03P00,03P01,03P02,03P03,03P04,03P05,03P10
0,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,True,False,...,False,False,True,False,False,False,False,False,False,False
3,False,False,False,False,False,False,True,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2157,False,False,False,False,False,False,False,False,True,False,...,False,False,True,False,False,False,False,False,False,False
2158,False,False,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
2159,False,False,False,False,False,False,False,False,True,False,...,False,False,True,False,False,False,False,False,False,False
2160,False,False,False,False,False,False,False,False,True,False,...,False,False,True,False,False,False,False,False,False,False


In [35]:
frequent_itemsets = apriori(df, min_support=0.001, use_colnames=True)
frequent_itemsets 

Unnamed: 0,support,itemsets
0,0.109621,(01K00)
1,0.057817,(01K01)
2,0.018039,(01K02)
3,0.015264,(01M00)
4,0.069843,(01M01)
...,...,...
553,0.001388,"(01P05, 03M03, 02P05)"
554,0.012951,"(03M05, 01P05, 02P05)"
555,0.002313,"(01P05, 02P05, 03P01)"
556,0.002775,"(01P05, 02P05, 03P03)"


In [36]:
from mlxtend.frequent_patterns import association_rules
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.3) 

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(01K02),(02M05),0.018039,0.316374,0.007863,0.435897,1.377793,0.002156,1.211883
1,(01K02),(03M05),0.018039,0.316374,0.006938,0.384615,1.215700,0.001231,1.110893
2,(01M01),(02M05),0.069843,0.316374,0.023589,0.337748,1.067561,0.001493,1.032276
3,(01M02),(02M05),0.011101,0.316374,0.004625,0.416667,1.317008,0.001113,1.171931
4,(01M02),(03M05),0.011101,0.316374,0.005088,0.458333,1.448709,0.001576,1.262079
...,...,...,...,...,...,...,...,...,...
198,"(01P05, 02P01)",(03M05),0.012026,0.316374,0.006938,0.576923,1.823549,0.003133,1.615844
199,"(03K01, 02P03)",(01P05),0.003700,0.192414,0.001388,0.375000,1.948918,0.000676,1.292137
200,"(03P01, 02P03)",(01P05),0.002313,0.192414,0.001388,0.600000,3.118269,0.000943,2.018964
201,"(03P05, 02P03)",(01P05),0.011101,0.192414,0.003700,0.333333,1.732372,0.001564,1.211378


In [38]:
bong_list[-1]

['01M05', '02M05', '03P05']

In [42]:
d0 = "02" + samsung_1bong[-1]
d1 = "01" + samsung_1bong[-2]
A = {}
for s in bong_list :
    if d0 in s:
        if d1 in s :
            A["{}".format(s[2])] =0 
for s in bong_list :
    if d0 in s:
        if d1 in s :
            A["{}".format(s[2])] += 1
print(A)

{'03M05': 40, '03K00': 12, '03K01': 12, '03P01': 9, '03P05': 29, '03M01': 10, '03M03': 14, '03M04': 2, '03M00': 3, '03P04': 1, '03P03': 8, '03K02': 2, '03P02': 1}


In [43]:
d0 = "02" + samsung_1bong[-1]
d1 = "01" + samsung_1bong[-2]
A = {'P0' : 0, 'P1' : 0, 'M0' : 0, 'M1' : 0, 'K0' : 0}
for s in bong_list :
    if d0 in s:
        if d1 in s :
            if "P0" in s[2] :
                A["P0"] += 1
            elif "P1" in s[2] :
                A["P1"] += 1
            elif "M0" in s[2] :
                A["M0"] += 1
            elif "M1" in s[2] :
                A["M1"] += 1
            elif "K0" in s[2] :
                A["K0"] += 1
print(A)

{'P0': 48, 'P1': 0, 'M0': 69, 'M1': 0, 'K0': 26}


In [None]:
today = pd.Timestamp.now()
today = str(today.year)+str(today.month)+str(today.day)
for stock_index in range(100,200):
    stock_code = str(stock_list['종목코드'][stock_index])
    if len(stock_code) < 6 : 
        stock_code = "0"*(6-len(stock_code)) + stock_code
    stockData = stock.get_market_ohlcv_by_date("20120101", today, stock_code)    # TODO: 종목코드를 csv에서 불러오기
    stockData.columns = pd.Index(["open", "high","low","close","volume"],name=stockData.columns.name)

    stockData['pattern1'] = None
    for i in range(len(stockData)):
        stockData['pattern1'].values[i]  = labellingD0(stockData.iloc[i])

    stockData['pattern2'] = None
    for i in range(1,len(stockData)):
        stockData['pattern2'].values[i] = labellingD1(stockData.iloc[i-1:i+1])

    stockData['pattern3'] = None
    for i in range(2,len(stockData)):
        stockData['pattern3'].values[i] = labellingD2(stockData.iloc[i-2:i+1])
##############################################################################################################################    
    globals()['A' + stock_code + '_1bong'] = stockData["pattern1"]
    
    
    globals()['A' + stock_code + '_bong_list'] = []
    for i in range(0, len(stockData)-2):
        globals()['A' + stock_code + '_1bong' + str(i)] = []
        globals()['A' + stock_code + '_1bong' + str(i)].append("01" + stockData["pattern1"][i])
        globals()['A' + stock_code + '_1bong' + str(i)].append("02" + stockData["pattern1"][i+1])
        globals()['A' + stock_code + '_1bong' + str(i)].append("03" + stockData["pattern1"][i+2])
        globals()['A' + stock_code + '_bong_list'].append(globals()['A' + stock_code + '_1bong'+ str(i)])
    d0 = "02" + stockData["pattern1"][-1]
    d1 = "01" + stockData["pattern1"][-2]
    A = {'P_score' : 0, 'stock_name' : stockData.columns.name, 'stock_code' : stock_code, 
         'P0' : 0, 'P1' : 0, 'M0' : 0, 'M1' : 0, 'K0' : 0}
    for s in globals()['A' + stock_code + '_bong_list'] :
        if d0 in s:
            if d1 in s :
                if "P0" in s[2] :
                    A["P0"] += 1
                elif "P1" in s[2] :
                    A["P1"] += 1
                elif "M0" in s[2] :
                    A["M0"] += 1
                elif "M1" in s[2] :
                    A["M1"] += 1
                elif "K0" in s[2] :
                    A["K0"] += 1
    if (A['P0']+A['P1']+A['M0']+A['M1']+A['K0']) == 0:
        pass
    else :
        A['P_score'] = ((A['P0']+A['P1']) / (A['P0']+A['P1']+A['M0']+A['M1']+A['K0']))
    print(A)