### 머신러닝 MultinomialNB test

In [1]:
from sklearn.naive_bayes import MultinomialNB #다항분포 나이브 베이즈 모델
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from konlpy.tag import Mecab, Okt, Kkma
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, Embedding, Dropout
from gensim.models import Word2Vec, FastText
from collections import Counter
from tensorflow.keras.initializers import Constant

try:
    from koeda import AEDA, EDA, RD, RI, SR, RS
except ImportError:
    !pip install koeda
    from koeda import AEDA, EDA, RD, RI, SR, RS

In [2]:
def preprocess(data):
    def _aug_setup():
        global config

        if config['aug']['mode'] == 'e':
            augmenter = EDA(morpheme_analyzer=config['morp'], alpha_sr=config['aug']['sr']['a'], alpha_ri=config['aug']['ri']['a'], alpha_rs=config['aug']['rs']['a'], prob_rd=config['aug']['rd']['a'])
            p = (config['aug']['sr']['p'], config['aug']['ri']['p'], config['aug']['rs']['p'], config['aug']['rd']['p'])
        elif config['aug']['mode'] == 'a':
            augmenter = AEDA(morpheme_analyzer=config['morp'], punc_ratio=0.3)
            p = max(config['aug']['sr']['p'], config['aug']['ri']['p'], config['aug']['rs']['p'], config['aug']['rd']['p'])
        else:
            augmenter = []
            if config['aug']['rd']['p'] != 0.0:
                augmenter.append(
                    (RD(morpheme_analyzer=config['morp']), config['aug']['rd']['p'])
                )

            if config['aug']['ri']['p'] != 0.0:
                augmenter.append(
                    (RI(morpheme_analyzer=config['morp'], stopword=config['aug']['stopword']), config['aug']['ri']['p'])
                )

            if config['aug']['sr']['p'] != 0.0:
                augmenter.append(
                    (SR(morpheme_analyzer=config['morp'], stopword=config['aug']['stopword']), config['aug']['sr']['p'])
                )

            if config['aug']['rs']['p'] != 0.0:
                augmenter.append(
                    (RS(morpheme_analyzer=config['morp']), config['aug']['rs']['p'])
                )
            p = None

        return augmenter, p

    def _aug(text):
        global config
        nonlocal augmenter
        nonlocal p

        if isinstance(augmenter, list):
            result = text

            for aug, p in augmenter:
                result = aug(result, p, config['aug']['repetition'])
        else:
            result = augmenter(text, p, config['aug']['repetition'])

        return result
    def _tokenize(text):
        result=[]
        tokenlist = config['morp'].pos(text)#,flatten=False 
        for word in tokenlist:
            result.append(word[0]+'__'+word[1].lower()) 
        return result
    
    
    global config

    if config['is_cut']:
        # 'conversation' 열의 각 항목에 대한 문자 수를 계산합니다.
        data['conversation_length'] = data['conversation'].apply(len)

        # 문자 수가 400 미만인 행만 선택합니다.
        data = data[data['conversation_length'] < config['cut_point']]

    if config['is_aug']:
        # 중복 augmenter 생성 방지를 위해서 처음 한번에 생성
        augmenter, p = _aug_setup()
        
        # 랜덤하게 행 선택 (예: 전체 행의 20%를 선택)
        random_indices = np.random.choice(data.index, size=int(len(data) * config['aug']['ratio']), replace=False)

        # 선택된 행에 대해 Random swap 함수 적용
        augmented_rows = data.loc[random_indices, 'conversation'].apply(_aug)

        # 증강된 데이터를 복사하고, 'text' 열에 증강된 텍스트를 삽입
        new_rows = data.loc[random_indices].copy()
        new_rows['conversation'] = augmented_rows
#         print(new_rows['conversation'])
        # 형태소 분석 실행
#         data['conversation'] = data['conversation'].apply(config['morp'].morphs)

        # 원본 데이터프레임에 증강된 데이터 추가
        data = pd.concat([data, new_rows])
        data['conversation'] = data['conversation'].apply(_tokenize)
    else:
        
        data['conversation'] = data['conversation'].apply(_tokenize)

    if config['is_stopword']:
        documents_by_class = df.groupby('class')['conversation'].apply(list)
        cls_nm = documents_by_class.index.tolist()
        cls_cnt = []
        for class_name, documents in documents_by_class.items():
            result = []
            for content in documents :
                result.extend(content)
            cls_cnt.append(result)
        total_counts = Counter('')
        temp = []
        for i in range(len(cls_nm)):
            word_counts = Counter(cls_cnt[i])
            total_counts = total_counts + word_counts
            temp.append(word_counts)
        temp2 =[]
        for i in range(len(cls_nm)):
            word_ratios = {word: count / total_counts[word] if word in total_counts else 1 for word, count in temp[i].items() if count > 10}
            sorted_word_ratios = sorted(word_ratios.items(), key=lambda x: x[1], reverse=True)
        #     sorted_word_ratios = sorted(word_ratios.items(), key=lambda x: x[1])
            temp2.append(sorted_word_ratios)
        word_counts_sorted = dict(sorted(total_counts.items(), key=lambda x: x[1], reverse=True))
        stopword = []
        for word, count in word_counts_sorted.items():
            if count < 50 : 
                break
            flag = True
            for tt in range(len(temp2)) :
                for i in range(len(temp2[tt])) :
                    if temp2[tt][i][1]<=0.4 :
                        break
                    #threshHold보다 큰 비율을 가지고 있는 단어는 불용어에서 제외한다
                    if word==temp2[tt][i][0] and temp2[tt][i][1]>0.4 :
                        flag = False
    #                     print(cls_nm[tt])
    #                     print(word)#불용어 제외 단어
                        break
                if not flag :
                    break
            if flag :
                stopword.append(word)
        df['conversation'] = df['conversation'].apply(lambda x: [word for word in x if word not in stopword])
    return data

In [3]:
from sklearn.model_selection import StratifiedKFold

def nfold_cross_validation(X, y, n_splits, model):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    scores = []
    
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        model.fit(X_train, y_train)
        score = model.score(X_test, y_test)
        scores.append(score)
        
    return scores

In [4]:
config = {
    'is_preprocess': True, # 전처리 여부
    'is_dnn': True, # 딥러닝 사용 여부
    'morp': Mecab(), # 형태소 분석기 변경,Kkma()
    'is_aug': True, # 데이터 augment 사용 여부
    # 데이터 augmentation 파라미터의 경우 아래의 글들을 참고하면 좋음
    # https://yeon22.tistory.com/203
    # https://catsirup.github.io/ai/2020/04/21/nlp_data_argumentation.html
    'aug': {
        'ratio': 0.3, # 적용할 데이터의 비율
        # 사용하기 싫은 것은 p의 값을 0.0으로 지정 한다.
        'rd': {
            # 여기서 a는 사실 확률이다.
            # API 통일을 위해 a로 표기했지만, prob_rd가 본명칭
            'a': 0.3,
            'p': 0.4,
        }, # RandomDeletion
        'ri': {
            'a': 0.3, # alpha 값이고 데이터 증강 기법의 강도를 의미한다.
            'p': 0.4, # p는 증강 기법이 "얼마나 자주" 적용될지
        }, # RandomInsertion
        'sr': {
            'a': 0.3,
            'p': 0.4,
        }, # SynonymReplacement
        'rs': {
            'a': 0.3,
            'p': 0.4,
        }, # RandomSwap
        'mode': 'e', # EDA: e, AEDA: a, other: o
        'stopword': True,
        'repetition': 1 # 반복 여부인 것 같아요.
    },
    'is_cut': True, # 단어길이 자를건지
    'cut_point': 400, # 자르는 기준
    'is_word2vec': False, # word2vec 사용 여부
    'is_fasttext': True, # fasttext 사용 여부
    'is_stopword': True # syh님
}

In [5]:
print('start')
# 데이터는 그대로고 모델만 수정해서 확인할 경우
# 중복으로 읽고 전처리하는 대신 FIXED 변수를 통해 제어하세요.
# 'df' in globals() 은 변수의 존재 여부를 판단합니다.
FIXED = False
if not FIXED and 'df' not in globals():
    print('get data')
    df = pd.read_csv('./data/train.csv')

    if config['is_preprocess']:
        print('preprocess')
        df = preprocess(df)
else:
    print('data was fixed')

print('start train')
if config['is_dnn']:
    print('select dnn')
    # 가정: 입력 크기는 1000, 출력 클래스는 2
    max_words = 10000
    output_dim = 4

    print('tokenize')
    # Tokenizer를 생성하고 텍스트 데이터에 적합시킵니다.
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(df['conversation'])

    # 텍스트를 정수 인덱스 시퀀스로 변환합니다.
    sequences = tokenizer.texts_to_sequences(df['conversation'])

    # 시퀀스의 길이를 맞추기 위해 패딩을 추가합니다.
    data_pad = pad_sequences(sequences, padding='pre')

    # class 열을 숫자로 변환
    encoder = LabelEncoder()
    df['class'] = encoder.fit_transform(df['class'])

    print('split dataset')
    x_train, x_test, y_train, y_test = train_test_split(data_pad, df['class'], test_size=0.2)
    print(len(data_pad[0]))
    # word2vec
    assert not (config['is_word2vec'] and config['is_fasttext']), 'you have to choose only one among word2vec and fasttext'
    if config['is_word2vec'] or config['is_fasttext']:
        word_index = tokenizer.word_index
        index_word = {idx: word for word, idx in word_index.items()}
        recovered = [[index_word[idx] for idx in seq if idx != 0] for seq in sequences]
        print(recovered[0])
        if config['is_word2vec']:
            print('select word2vec')
            # Word2Vec 모델 학습
            word2vec_model = Word2Vec(sentences=recovered, 
                                    vector_size=100, 
                                    window=5, 
                                    min_count=1, 
#                                     workers=4,
                                    sg=0)

            # 단어 인덱스와 임베딩 매트릭스 생성
#             vocab_size = len(word2vec_model.wv.vocab) + 1  # +1 for padding
            vocab_size = len(word2vec_model.wv.key_to_index) + 1  
            print(vocab_size)
            embedding_dim = word2vec_model.wv.vector_size
            print(embedding_dim)
            embedding_matrix = np.zeros((vocab_size, embedding_dim))

#             for i, word in enumerate(word2vec_model.wv.vocab):
#                 embedding_matrix[i] = word2vec_model.wv[word]
            for i in range(4,vocab_size):
                if index_word[i] in word2vec_model.wv:
                    embedding_matrix[i] = word2vec_model.wv[index_word[i]]
            # Embedding layer with pre-trained Word2Vec weights
            embedding_layer = Embedding(vocab_size, 
                                        embedding_dim, 
                                        weights=[embedding_matrix], 
                                        input_length=len(data_pad[0]),  # 뭐 차원이 다르다 이러면 요거 건드려 보세요.
                                        trainable=True)  # Keep embeddings fixed
        elif config['is_fasttext']:
            print('select fasttext')
            fasttext_model = FastText(
                sentences=recovered,
                window=5,
                min_count=5,
                workers=4,
                sg=1
            )

            embedding_dim = fasttext_model.vector_size
#             vocab_size = len(fasttext_model.wv.vocab) + 1
            vocab_size = len(fasttext_model.wv.key_to_index) + 1  
            embedding_matrix = np.zeros((vocab_size, embedding_dim))

#             for i, word in enumerate(fasttext_model.wv.vocab):
#                 embedding_matrix[i] = fasttext_model.wv[word]
            for i in range(4,vocab_size):
                if index_word[i] in fasttext_model.wv:
                    embedding_matrix[i] = fasttext_model.wv[index_word[i]]
            embedding_layer = Embedding(vocab_size,
                                        embedding_dim,
                                        weights=[embedding_matrix],
                                        input_length=len(data_pad[0]),
                                        trainable=True)

    else:
        print('select normal')
        embedding_layer = Embedding(len(tokenizer.word_index)+1, 128, input_length=data_pad.shape[1])

    print('create model')
    # 모델을 생성합니다.
    model = Sequential()
    model.add(embedding_layer)
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))  # 드롭아웃 레이어 추가 (0.5는 드롭아웃 비율)
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))  # 드롭아웃 레이어 추가 (0.5는 드롭아웃 비율)
    model.add(Dense(len(df['class'].unique()), activation='softmax'))

    # 모델을 컴파일합니다.
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    print('model fit')
    # 모델을 학습합니다.
    model.fit(x_train, y_train, epochs=30,validation_data=(x_test, y_test))

    print('model evaluate')
    temp = model.evaluate(x=x_test, y=y_test)
    print(temp)
#     model.evaluate(x=x_test, y=y_test)
else:
    print('select ml')
    df['conversation'] = df['conversation'].apply(lambda x: ' '.join(x))

    vectorizer = CountVectorizer()

    x_train = vectorizer.fit_transform(df['conversation'])
    y_train = df['class']
#     print(x_train[4777])
#     print(len(y_train))
#     assert len(x_train) == len(y_train)
    x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2)


    model = MultinomialNB()
#     score = nfold_cross_validation(x_train,y_train,10,model)
    model.fit(x_train, y_train)

    score = model.score(x_test, y_test)
    print('Accuracy:', score)

start
get data
preprocess
start train
select dnn
tokenize
split dataset
283
['지금__mag', '너__np', '스스로__nng', '를__jko', '죽여__vv+ec', '달__vx', '라고__ec', '하__xsv', '는__etm', '것__nnb', '인가__vcp+ef', '?__sf', '아닙니다__vcn+ef', '.__sf', '죄송__xr', '합니다__xsa+ef', '.__sf', '죽__vv', '을__etm', '거__nnb', '면__vcp+ec', '혼자__mag', '죽__vv', '지__ec', '우리__np', '까지__jx', '사건__nng', '에__jkb', '게__ec', '해__vx+ef', '?__sf', '진짜__mag', '죽여__vv+ec', '버리__vx', '고__ec', '싶__vx', '게__ec', '.__sf', '정말__mag', '잘못__mag', '했__vv+ep', '습니다__ef', '.__sf', '너__np', '가__jks', '선택__nng', '해__xsv+ef', '.__sf', '너__np', '가__jks', '죽__vv', '을래__ec', '네__mm', '가족__nng', '을__jko', '죽여__vv+ec', '줄까__vx+ef', '.__sf', '죄송__xr', '합니다__xsa+ef', '.__sf', '정말__mag', '잘못__mag', '했__vv+ep', '습니다__ef', '.__sf', '너__np', '에게__jkb', '는__jx', '선택__nng', '권__xsn', '이__jks', '없__va', '어__ef', '.__sf', '선택__nng', '못__mag', '한다면__vv+ec', '너__np', '와__jkb', '네__mm', '가족__nng', '까지__jx', '모조리__mag', '죽여__vv+ec', '버릴__vx+etm', '거__nnb', '야__vcp+

In [6]:
def iterrableFit():
    print('start train')
    if config['is_dnn']:
        print('select dnn')
        # 가정: 입력 크기는 1000, 출력 클래스는 2
        max_words = 10000
        output_dim = 4

        print('tokenize')
        # Tokenizer를 생성하고 텍스트 데이터에 적합시킵니다.
        tokenizer = Tokenizer(num_words=max_words)
        tokenizer.fit_on_texts(df['conversation'])

        # 텍스트를 정수 인덱스 시퀀스로 변환합니다.
        sequences = tokenizer.texts_to_sequences(df['conversation'])

        # 시퀀스의 길이를 맞추기 위해 패딩을 추가합니다.
        data_pad = pad_sequences(sequences, padding='pre')

        # class 열을 숫자로 변환
        encoder = LabelEncoder()
        df['class'] = encoder.fit_transform(df['class'])

        print('split dataset')
        x_train, x_test, y_train, y_test = train_test_split(data_pad, df['class'], test_size=0.2)
        print(len(data_pad[0]))
        # word2vec
        assert not (config['is_word2vec'] and config['is_fasttext']), 'you have to choose only one among word2vec and fasttext'
        if config['is_word2vec'] or config['is_fasttext']:
            word_index = tokenizer.word_index
            index_word = {idx: word for word, idx in word_index.items()}
            recovered = [[index_word[idx] for idx in seq if idx != 0] for seq in sequences]
            print(recovered[0])
            if config['is_word2vec']:
                print('select word2vec')
                # Word2Vec 모델 학습
                word2vec_model = Word2Vec(sentences=recovered, 
                                        vector_size=100, 
                                        window=5, 
                                        min_count=1, 
    #                                     workers=4,
                                        sg=0)

                # 단어 인덱스와 임베딩 매트릭스 생성
    #             vocab_size = len(word2vec_model.wv.vocab) + 1  # +1 for padding
                vocab_size = len(word2vec_model.wv.key_to_index) + 1  
                print(vocab_size)
                embedding_dim = word2vec_model.wv.vector_size
                print(embedding_dim)
                embedding_matrix = np.zeros((vocab_size, embedding_dim))

    #             for i, word in enumerate(word2vec_model.wv.vocab):
    #                 embedding_matrix[i] = word2vec_model.wv[word]
                for i in range(4,vocab_size):
                    if index_word[i] in word2vec_model.wv:
                        embedding_matrix[i] = word2vec_model.wv[index_word[i]]
                # Embedding layer with pre-trained Word2Vec weights
                embedding_layer = Embedding(vocab_size, 
                                            embedding_dim, 
                                            weights=[embedding_matrix], 
                                            input_length=len(data_pad[0]),  # 뭐 차원이 다르다 이러면 요거 건드려 보세요.
                                            trainable=True)  # Keep embeddings fixed
            elif config['is_fasttext']:
                print('select fasttext')
                fasttext_model = FastText(
                    sentences=recovered,
                    window=5,
                    min_count=5,
                    workers=4,
                    sg=1
                )

                embedding_dim = fasttext_model.vector_size
    #             vocab_size = len(fasttext_model.wv.vocab) + 1
                vocab_size = len(fasttext_model.wv.key_to_index) + 1  
                embedding_matrix = np.zeros((vocab_size, embedding_dim))

    #             for i, word in enumerate(fasttext_model.wv.vocab):
    #                 embedding_matrix[i] = fasttext_model.wv[word]
                for i in range(4,vocab_size):
                    if index_word[i] in fasttext_model.wv:
                        embedding_matrix[i] = fasttext_model.wv[index_word[i]]
                embedding_layer = Embedding(vocab_size,
                                            embedding_dim,
                                            weights=[embedding_matrix],
                                            input_length=len(data_pad[0]),
                                            trainable=True)

        else:
            print('select normal')
            embedding_layer = Embedding(len(tokenizer.word_index)+1, 128, input_length=data_pad.shape[1])

        print('create model')
        # 모델을 생성합니다.
        model = Sequential()
        model.add(embedding_layer)
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.5))  # 드롭아웃 레이어 추가 (0.5는 드롭아웃 비율)
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))  # 드롭아웃 레이어 추가 (0.5는 드롭아웃 비율)
        model.add(Dense(len(df['class'].unique()), activation='softmax'))

        # 모델을 컴파일합니다.
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        print('model fit')
        # 모델을 학습합니다.
        model.fit(x_train, y_train, epochs=10,validation_data=(x_test, y_test))

        print('model evaluate')
        temp = model.evaluate(x=x_test, y=y_test)
#         print(temp)
        return temp[1]
    else:
        print('select ml')
        x_train = df['conversation'].apply(lambda x: ' '.join(x))

        vectorizer = CountVectorizer()

        x_train = vectorizer.fit_transform(x_train)
        y_train = df['class']
    #     print(x_train[4777])
    #     print(len(y_train))
    #     assert len(x_train) == len(y_train)
        x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2)


        model = MultinomialNB()
    #     score = nfold_cross_validation(x_train,y_train,10,model)
        model.fit(x_train, y_train)

        score = model.score(x_test, y_test)
        print('Accuracy:', score)
        return score

In [None]:
# iterArray[True,False]
# config['is_aug'] = True
# config['is_stopword']
# config['is_cut']
import itertools

variables = [True, False]

combinations = list(itertools.product(variables, repeat=3))
resultScoreArr = []
config['morp'] = Kkma()
# config['morp'] = Mecab()
for combination in combinations:
    config['is_aug'],config['is_stopword'],config['is_cut'] = combination[0],combination[1],combination[2]
    config['is_dnn'] = False
    df = pd.read_csv('./data/train.csv')
    df = preprocess(df)
    score = iterrableFit()
    temparr = [config['is_aug'],config['is_stopword'],config['is_cut'],config['is_dnn'],config['is_word2vec'],config['is_fasttext'],score]
    resultScoreArr.append(temparr)
    config['is_dnn'] = True
    config['is_word2vec'] = False
    config['is_fasttext'] = False
    score = iterrableFit()
    temparr = [config['is_aug'],config['is_stopword'],config['is_cut'],config['is_dnn'],config['is_word2vec'],config['is_fasttext'],score]
    resultScoreArr.append(temparr)
    config['is_word2vec'] = True
    score = iterrableFit()
    temparr = [config['is_aug'],config['is_stopword'],config['is_cut'],config['is_dnn'],config['is_word2vec'],config['is_fasttext'],score]
    resultScoreArr.append(temparr)
    config['is_word2vec'] = False
    config['is_fasttext'] = True
    score = iterrableFit()
    temparr = [config['is_aug'],config['is_stopword'],config['is_cut'],config['is_dnn'],config['is_word2vec'],config['is_fasttext'],score]
    resultScoreArr.append(temparr)

start train
select ml
Accuracy: 0.8869294605809128
start train
select dnn
tokenize
split dataset
323
select normal
create model
model fit
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
model evaluate
start train
select dnn
tokenize
split dataset
323
['지금__mag', '너__np', '스스로__nng', '를__jko', '죽이__vv', '어__ecs', '달라__vv', '고__ece', '애원__nng', '하__xsv', '는__etd', '것__nnb', '이__vcp', 'ㄴ가__efq', '?__sf', '아니__vcn', 'ㅂ니다__efn', '.__sf', '죄송__xr', '하__xsa', 'ㅂ니다__efn', '.__sf', '죽__vv', '을__etd', '거__nnb', '이__vcp', '면__ece', '혼자__nng', '죽__vv', '지__ecd', '우리__np', '까지__jx', '사건__nng', '에__jkm', '휘말리__vv', '게__ecd', '하__vv', '어__ecs', '?__sf', '진짜__mag', '죽이__vv', '어__ecs', '버리__vxv', '고__ece', '싶__vxa', '게__ecd', '.__sf', '정말__mag', '잘못하__vv', '었__ept', '습니다__efn', '.__sf', '너__np', '가__jks', '선택__nng', '하__xsv', '어__ecs', '.__sf', '너__np', '가__jks', '죽__vv', '을래__efn', '네__mdn', '가족__nng', '을__jko', '죽여주__vv', 'ㄹ까__efq', '.__s

In [8]:
#[config['is_aug'],config['is_stopword'],config['is_cut'],config['is_dnn'],config['is_word2vec'],config['is_fasttext'],score]
resultScoreArr

[[True, True, True, False, False, True, 0.8973029045643154],
 [True, True, True, True, False, False, 0.8734439611434937],
 [True, True, True, True, True, False, 0.7323651313781738],
 [True, True, True, True, False, True, 0.819502055644989],
 [True, True, False, False, False, True, 0.9006815968841285],
 [True, True, False, True, False, False, 0.8763388395309448],
 [True, True, False, True, True, False, 0.7332035303115845],
 [True, True, False, True, False, True, 0.8276533484458923],
 [True, False, True, False, False, True, 0.8869294605809128],
 [True, False, True, True, False, False, 0.8672199249267578],
 [True, False, True, True, True, False, 0.7105808854103088],
 [True, False, True, True, False, True, 0.8298755288124084],
 [True, False, False, False, False, True, 0.8958130477117819],
 [True, False, False, True, False, False, 0.8802337050437927],
 [True, False, False, True, True, False, 0.7185978293418884],
 [True, False, False, True, False, True, 0.82473224401474],
 [False, True, True

필요시 참고 링크
KoEDA
https://github.com/toriving/KoEDA