# Import

In [163]:
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, cohen_kappa_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from subprocess import check_output
from xgboost import XGBClassifier, plot_importance
from lightgbm import LGBMClassifier
from numpy import newaxis
from collections import Counter
from tqdm import tqdm_notebook
from glob import glob
from functools import reduce

import xgboost as xg
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import time
import re
import warnings
warnings.filterwarnings("ignore")

# Function

In [203]:
def del_per(x) :
    try :
        if " %" in x :
            output = re.sub(" %","",x)
        else :
            output = re.sub("%","",x)
        return(output)
    except :
        pass

In [3]:
def cleansing(x) :
    output = re.sub("[년월]","-",x)
    output = re.sub("[일 ]","",output)
    return(output)

In [199]:
def text2num(x) :
    if type(x) == str :
        output = float(re.sub(",","",x))
    else :
        output = x
    return output

In [79]:
def model_performance(acc, truth, predict) :
    print("정확도 : ",np.mean(acc))
    print("정답 분포 :","Up -", Counter(truth)['Up']/np.sum([x for x in Counter(truth).values()]),"Down -", Counter(truth)['Down']/ np.sum([x for x in Counter(truth).values()]))
    print("Kappa :",cohen_kappa_score(truth,predict))

In [1]:
def model_performance_reg(acc) :
    print("정확도 : ",np.mean(acc))

In [5]:
def percent2num(x) :
    if "," in x :
        output = re.sub("%","",x)
        output = float(text2num(output))
    else :
        output = float(re.sub("%","",x))
    return output

In [330]:
def volum_num(x) :
    if x == "-" :
        output = 0
    else :
        text = re.findall('[KMB]',x)[0]
        if text == "K" :
            num = re.sub('[KBM]',"",x)
            num = text2num(num)
            output = num * 1000
        elif text == "M" :
            num = re.sub('[KBM]',"",x)
            num = text2num(num)
            output = num * 1000000
        elif text == "B" :
            num = re.sub('[KBM]',"",x)
            num = text2num(num)
            output = num * 1000000000
    return output

In [350]:
def continual_learning(model, data, del_var, target_var, train_volum, train_start_idx) :
    train_end_idx = train_start_idx + train_volum
    test_start_idx = train_end_idx + 1
    
    whole_predict = []
    whole_truth = []
    whole_acc = []
    
    select_list = [x for x in data.columns if x not in del_var]
    
    print("Start Fitting..")
    for _ in tqdm_notebook(range(data.shape[0] - (train_volum + 1))) :
        
        tmp_train_x = data.loc[train_start_idx:train_end_idx,select_list]
        tmp_train_y = data.loc[train_start_idx:train_end_idx,target_var]
        tmp_test_x = data.loc[test_start_idx:test_start_idx,select_list]
        tmp_test_y = data.loc[test_start_idx:test_start_idx,target_var]

        # fitting
        model.fit(tmp_train_x,tmp_train_y)
        # predict
        predict_value = model.predict(tmp_test_x)

        # save acc
        if target_var == "clf_target" :
            whole_acc.append(accuracy_score(predict_value,tmp_test_y))
        elif target_var == "reg_target" :
            whole_acc.append(mean_squared_error(predict_value,tmp_test_y))
        whole_predict.append(predict_value[0])
        whole_truth.append(tmp_test_y.iloc[0])

        train_start_idx += 1
        train_end_idx = train_start_idx + train_volum
        test_start_idx = train_end_idx + 1
    
    print("Finish")
    return whole_predict, whole_truth, whole_acc

In [351]:
def continual_learning_with_sampling(model, data, del_var, target_var, train_volum, train_start_idx, sample_days = 7, sample_cnt = 1) :
    train_end_idx = train_start_idx + train_volum
    test_start_idx = train_end_idx + 1
    
    whole_predict = []
    whole_truth = []
    whole_acc = []
    
    select_list = [x for x in data.columns if x not in del_var]

    print("Start Fitting..")
    for _ in tqdm_notebook(range(data.shape[0] - (train_volum + 1))) :
        tmp_train_x = data.loc[train_start_idx:train_end_idx,select_list]
        tmp_train_y = data.loc[train_start_idx:train_end_idx,target_var]
        tmp_test_x = data.loc[test_start_idx:test_start_idx,select_list]
        tmp_test_y = data.loc[test_start_idx:test_start_idx,target_var]

        # append sample
        for _ in range(sample_cnt) :
            tmp_train_x = tmp_train_x.append(tmp_train_x.loc[(train_end_idx - sample_days):])
            tmp_train_y = tmp_train_y.append(tmp_train_y.loc[(train_end_idx - sample_days):])

        # fitting
        model.fit(tmp_train_x,tmp_train_y)
        # predict
        predict_value = model.predict(tmp_test_x)

        # save acc
        if target_var == "clf_target" :
            whole_acc.append(accuracy_score(predict_value,tmp_test_y))
        elif target_var == "reg_target" :
            whole_acc.append(mean_squared_error(predict_value,tmp_test_y))
        whole_predict.append(predict_value[0])
        whole_truth.append(tmp_test_y.iloc[0])

        train_start_idx += 1
        train_end_idx = train_start_idx + train_volum
        test_start_idx = train_end_idx + 1
    
    print("Finish")
    return whole_predict, whole_truth, whole_acc

In [374]:
def make_target(data) :
    # 1. 분류 데이터
    clf_target = []
    for idx in range(1, data.shape[0]) :
        if data.kopsi_현재가[idx] >= data.kopsi_현재가[idx - 1] :
            clf_target.append("Up")
        else :
            clf_target.append("Down")
    # 2. 수치 데이터
    reg_target = []
    for idx in range(1,data.shape[0]) :
        reg_target.append(data.kopsi_현재가[idx])
    # 3. 가장 최근 데이터 날림
    data = data.iloc[:(data.shape[0] - 1),:]
    # 4. 저장
    data['clf_target'] = clf_target
    data['reg_target'] = reg_target
    return data

# Load data

In [7]:
save_file_path = "/home/ubunt/4-2/stock price/data/price/"

In [364]:
kospi = pd.read_csv(save_file_path + "KOSPI.csv")

In [365]:
kospi.head()

Unnamed: 0,date,kopsi_현재가,kopsi_오픈,kopsi_고가,kopsi_저가,kopsi_거래량,kopsi_변동
0,2010-01-04,1696.14,1681.71,1696.14,1681.71,296550000.0,0.79
1,2010-01-05,1690.62,1701.62,1702.39,1686.45,408850000.0,-0.33
2,2010-01-06,1705.32,1697.88,1706.89,1696.1,426040000.0,0.87
3,2010-01-07,1683.45,1702.92,1707.9,1683.45,462400000.0,-1.28
4,2010-01-08,1695.26,1694.06,1695.26,1668.84,379950000.0,0.7


In [360]:
kospi.to_csv(save_file_path + "KOSPI.csv", index=False)

#### 타겟 변수 생성

1. 분류 데이터

In [12]:
clf_target = []
for idx in range(1, kospi.shape[0]) :
    if kospi.kopsi_현재가[idx] >= kospi.kopsi_현재가[idx - 1] :
        clf_target.append("Up")
    else :
        clf_target.append("Down")

2. 수치 데이터

In [14]:
reg_target = []
for idx in range(1,kospi.shape[0]) :
    reg_target.append(kospi.kopsi_현재가[idx])

3. 가장 최근 데이터 날림

In [15]:
kospi = kospi.iloc[:(kospi.shape[0] - 1),:]

4. 저장

In [16]:
kospi['clf_target'] = clf_target
kospi['reg_target'] = reg_target

# Train on base feature

#### continual learning LGBM

In [240]:
var_set = ['date','clf_target',"reg_target"]
LightGBM_base = LGBMClassifier(random_state=0)

In [241]:
model_predict, ground_truth, model_acc = continual_learning(model = LightGBM_base, data = kospi, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish


#### 모델 성능

In [242]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.5006446067898582
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.0004302922808374632


#### Tuned LGBM

In [97]:
LightGBM_tuned =LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        importance_type='split', learning_rate=0.06528650438687811,
        max_depth=24, min_child_samples=20, min_child_weight=0.001,
        min_split_gain=0.0, n_estimators=400, n_jobs=-1, num_leaves=31,
        objective=None, random_state=0, reg_alpha=0.0, reg_lambda=0.0,
        silent=True, subsample=0.1817270377811787,
        subsample_for_bin=200000, subsample_freq=0)

In [100]:
model_predict, ground_truth, model_acc = continual_learning(model = LightGBM_tuned, data = kospi, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # Remove the CWD from sys.path while we load stuff.


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish


In [101]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.4933390631714654
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.014156245829847869


#### 기본 모델 test

In [118]:
# Classifier set 
random_state = 0
classifier_set = {}
classifier_set['SVM'] = SVC(random_state=random_state,verbose=False)
classifier_set['DT'] = DecisionTreeClassifier(random_state=random_state)
classifier_set['AdaBoost'] = AdaBoostClassifier(DecisionTreeClassifier(random_state=random_state),random_state=random_state,learning_rate=0.1)
classifier_set['RF'] = RandomForestClassifier(random_state=random_state)
classifier_set['ExtraTree'] = ExtraTreesClassifier(random_state=random_state)
classifier_set['GBM'] = GradientBoostingClassifier(random_state=random_state)
classifier_set['MLP'] = MLPClassifier(random_state=random_state)
classifier_set['KNN'] = KNeighborsClassifier()
classifier_set['LogisticLinear'] = LogisticRegression(random_state = random_state)
classifier_set['LDA'] = LinearDiscriminantAnalysis()

In [119]:
results_set = {}

In [120]:
for idx,base_model in enumerate(tqdm_notebook(classifier_set.values())) :
    model_predict, ground_truth, model_acc = continual_learning(model = base_model, data = kospi, del_var= var_set,
                                                               target_var = "clf_target", train_volum = 100, 
                                                                train_start_idx = 0)
    results_set[[x for x in classifier_set.keys()][idx]] = (model_predict, ground_truth, model_acc)
    print([x for x in classifier_set.keys()][idx])
    model_performance(model_acc,ground_truth,model_predict)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
SVM
정확도 :  0.4997851310700473
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.004968021376962062
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
DT
정확도 :  0.4941985388912763
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.012361782855622172
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
AdaBoost
정확도 :  0.49935539321014183
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.002700790375096185
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
RF
정확도 :  0.5100988397077783
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.024637931605013708
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
ExtraTree
정확도 :  0.5161151697464547
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.03478911768107906
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
GBM
정확도 :  0.49806617963042543
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.0045839379582734985
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
MLP
정확도 :  0.49935539321014183
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.013087771629558764
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
KNN
정확도 :  0.4954877524709927
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.009596700579837636
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
LogisticLinear
정확도 :  0.4911903738719381
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.028582777758281885
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
LDA
정확도 :  0.5100988397077783
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.018852320774982334



# continual learning with sampling

In [155]:
model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = LightGBM_base, data = kospi, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish


In [156]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.5109583154275892
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.022434936690169383


#### 기본 모델 test

In [157]:
results_sampling_set = {}

In [158]:
for idx,base_model in enumerate(tqdm_notebook(classifier_set.values())) :
    model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = base_model, data = kospi, del_var= var_set,
                                                               target_var = "clf_target", train_volum = 100, 
                                                                train_start_idx = 0)
    results_sampling_set[[x for x in classifier_set.keys()][idx]] = (model_predict, ground_truth, model_acc)
    print([x for x in classifier_set.keys()][idx])
    model_performance(model_acc,ground_truth,model_predict)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
SVM
정확도 :  0.4860335195530726
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.040931219274282116
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
DT
정확도 :  0.5100988397077783
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.02057371892380455
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
AdaBoost
정확도 :  0.5161151697464547
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.032955230755262854
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
RF
정확도 :  0.5143962183068328
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.035090358793102516
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
ExtraTree
정확도 :  0.5019338203695746
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.010291236412569904
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
GBM
정확도 :  0.5096691018478728
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.019535385591406884
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
MLP
정확도 :  0.4941985388912763
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.01369717503167256
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
KNN
정확도 :  0.5092393639879673
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.016266524911748625
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
LogisticLinear
정확도 :  0.49290932531155995
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : -0.02488056136160055
Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish
LDA
정확도 :  0.5088096261280619
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.01522466187884619



# 환율

# continual learning with sampling

In [352]:
var_set = ['date','clf_target',"reg_target"]

In [361]:
exchange_df = pd.read_csv(save_file_path + "KOSPI_환율.csv")

#### 타겟 변수 생성

In [376]:
exchange_df = make_target(exchange_df)

In [363]:
exchange_df.head()

Unnamed: 0,date,kopsi_현재가,kopsi_오픈,kopsi_고가,kopsi_저가,kopsi_거래량,kopsi_변동,GBP_USD내역_현재가,GBP_USD내역_오픈,GBP_USD내역_고가,...,미국달러지수선물내역_오픈,미국달러지수선물내역_고가,미국달러지수선물내역_저가,미국달러지수선물내역_거래량,미국달러지수선물내역_변동,USD_CNY_현재가,USD_CNY_오픈,USD_CNY_고가,USD_CNY_저가,USD_CNY_변동
0,2010-01-04,1696.14,1681.71,1696.14,1681.71,296550000.0,0.79,1.6097,1.6127,1.6242,...,78.355,78.59,77.575,18390.0,-0.59,6.8285,6.8297,6.8297,6.827,0.02
1,2010-01-05,1690.62,1701.62,1702.39,1686.45,408850000.0,-0.33,1.5998,1.6092,1.6156,...,77.765,77.95,77.39,19300.0,0.03,6.8268,6.8283,6.8288,6.8257,-0.02
2,2010-01-06,1705.32,1697.88,1706.89,1696.1,426040000.0,0.87,1.6025,1.5991,1.6065,...,77.94,78.215,77.58,16010.0,-0.25,6.8278,6.8277,6.8291,6.8267,0.01
3,2010-01-07,1683.45,1702.92,1707.9,1683.45,462400000.0,-1.28,1.5935,1.6011,1.606,...,77.615,78.27,77.6,15510.0,0.58,6.8281,6.8269,6.8282,6.8258,0.0
4,2010-01-08,1695.26,1694.06,1695.26,1668.84,379950000.0,0.7,1.6028,1.5934,1.6111,...,78.2,78.435,77.545,23910.0,-0.58,6.8276,6.8278,6.8285,6.826,-0.01


In [354]:
model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = LightGBM_base, data = exchange_df, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2319), HTML(value='')))


Finish


In [355]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.5416127641224666
정답 분포 : Up - 0.5213454075032341 Down - 0.47865459249676584
Kappa : 0.08266824746664525


# 채권

In [373]:
bond_df = pd.read_csv(save_file_path + "KOSPI_채권.csv")

#### 타겟 변수 생성

In [376]:
bond_df = make_target(bond_df)

In [None]:
bond_df.head()

In [378]:
model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = LightGBM_base, data = bond_df, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish


In [379]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.5165449076063601
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.03030640891390257


# 주식

In [380]:
stock_df = pd.read_csv(save_file_path + "KOSPI_주식.csv")

#### 타겟 변수 생성

In [381]:
stock_df = make_target(stock_df)

In [382]:
model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = LightGBM_base, data = stock_df, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish


In [383]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.5509239363987968
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.09939450144050799


# 지수

In [384]:
index_df = pd.read_csv(save_file_path + "KOSPI_지수.csv")

#### 타겟 변수 생성

In [385]:
index_df = make_target(index_df)

In [386]:
model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = LightGBM_base, data = index_df, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish


In [387]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.5745595186935969
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.14770103886110042


# 원자재

In [407]:
material_df = pd.read_csv(save_file_path + "KOSPI_원자재.csv")

#### 타겟 변수 생성

In [408]:
material_df = make_target(material_df)

In [390]:
model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = LightGBM_base, data = material_df, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))


Finish


In [391]:
model_performance(model_acc,ground_truth,model_predict)

정확도 :  0.5238504512247529
정답 분포 : Up - 0.5212720240653201 Down - 0.47872797593467986
Kappa : 0.044924533096458164


In [403]:
from ngboost.ngboost import NGBoost
from ngboost.learners import default_tree_learner
from ngboost.scores import MLE
from ngboost.distns import Normal

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [404]:
X, Y = load_boston(True)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE(), natural_gradient=True,
              verbose=False)
ngb.fit(X_train, Y_train)
Y_preds = ngb.predict(X_test)
Y_dists = ngb.pred_dist(X_test)

# test Mean Squared Error
test_MSE = mean_squared_error(Y_preds, Y_test)
print('Test MSE', test_MSE)

#test Negative Log Likelihood
test_NLL = -Y_dists.logpdf(Y_test.flatten()).mean()
print('Test NLL', test_NLL)

Test MSE 11.191782986778595
Test NLL 2.6686430306101885


In [409]:
model_predict, ground_truth, model_acc = continual_learning_with_sampling(model = ngb, data = material_df, del_var= var_set,
                                                           target_var = "clf_target", train_volum = 100, 
                                                            train_start_idx = 0)

Start Fitting..


HBox(children=(IntProgress(value=0, max=2327), HTML(value='')))

TypeError: unsupported operand type(s) for /: 'str' and 'int'

# 참고사항

#### multiple dataframe join
https://stackoverflow.com/questions/23668427/pandas-three-way-joining-multiple-dataframes-on-columns