### 01. Days

In [1]:
train_start_days = ['2018-02-01','2018-03-01','2018-04-01','2018-05-01','2018-06-01','2018-07-01',\
                    '2018-08-01','2018-09-01','2018-10-01','2018-11-01','2018-12-01','2019-01-01']
train_end_days = ['2019-01-31','2019-02-28','2019-03-31','2019-04-30','2019-05-31','2019-06-30',\
                  '2019-07-31','2019-08-31','2019-09-30','2019-10-31','2019-11-30','2019-12-31']
test_start_days = ['2019-02-01','2019-03-01','2019-04-01','2019-05-01','2019-06-01','2019-07-01',\
                   '2019-08-01','2019-09-01','2019-10-01','2019-11-01','2019-12-01','2020-01-01']
test_end_days = ['2019-02-28','2019-03-31','2019-04-30','2019-05-31','2019-06-30','2019-07-31',\
                 '2019-08-31','2019-09-30','2019-10-31','2019-11-30','2019-12-31','2020-01-31']

### 02. Simulation

In [2]:
acc = []
test_obs = 0

for step in range(12) : 
        
    # 경고 메세지 안뜨게 하기
    import warnings
    warnings.filterwarnings(action='ignore') 

    # Dataset Loading
    import pandas as pd
    dataset = pd.read_csv('../data/er_data.csv')

    # Dataset Columns
    dataset.columns = ['DATE', 'CAD', 'CHF', 'EUR', 'GBP', 'JPY(100)', 'BITCOIN', 'TETHER', 'USD']

    # Dataset Index
    dataset = dataset.set_index('DATE')
    
    # Train Test Split
    train = dataset[train_start_days[step]:train_end_days[step]]
    test = dataset[test_start_days[step]:test_end_days[step]]

    # Train (sliding Window)
    train['USD_1'] = train['USD'].shift(-1)
    train['Target'] = train['USD_1'] - train['USD']
    train['Target'] = train['Target'].apply(lambda x : 'UP' if x > 0 else 'DOWN')
    train = train.dropna()

    # Test (sliding Window)
    test['USD_1'] = test['USD'].shift(-1)
    test['Target'] = test['USD_1'] - test['USD']
    test['Target'] = test['Target'].apply(lambda x : 'UP' if x > 0 else 'DOWN')
    test = test.dropna()

    # Target, Input Split
    train_input = train[['CAD', "CHF", "EUR", 'GBP', 'JPY(100)', 'BITCOIN', 'TETHER']]
    train_target = train['Target']

    test_input = test[['CAD', "CHF", "EUR", 'GBP', 'JPY(100)', 'BITCOIN', 'TETHER']]
    test_target = test['Target']

    # Transform to Numpy Array
    import numpy as np
    train = np.array(train)
    test = np.array(test)

    # Data Normalization
    mean = train_input.mean(axis=0)
    train_input -= mean
    std = train_input.std(axis=0)
    train_input /= std
    test_input -= mean
    test_input /= std
    
    # PCA
    from sklearn.decomposition import PCA
    pca = PCA(n_components = 3)
    principalComponents_train = pca.fit_transform(train_input)
    principalComponents_test = pca.transform(test_input)
    train_input = pd.DataFrame(data = principalComponents_train,\
                               columns = ['principal component 1','principal component 2',\
                                          'principal component 3'])
    test_input = pd.DataFrame(data = principalComponents_test,\
                              columns = ['principal component 1', 'principal component 2',\
                                         'principal component 3'])
    
    # XGBoost
    from xgboost import XGBClassifier
    model = XGBClassifier(booster='gbtree', 
                          colsample_bylevel=0.9, 
                          colsample_bytree=0.8, 
                          gamma=0, 
                          max_depth=8, 
                          min_child_weight=3, 
                          n_estimators=50, 
                          nthread=4, 
                          objective='binary:logistic', 
                          random_state=2, 
                          silent= True)

    model.fit(train_input,train_target, eval_set=[(train_input, train_target)], early_stopping_rounds=50,verbose=0)
    from sklearn.metrics import accuracy_score
    y_pred = model.predict(test_input)
    y_true = test_target
    test_obs += len(test_input)
    acc.append(accuracy_score(y_true, y_pred)*100*len(test_input))
    
    print('예측 구간 : {} ~ {}     Accuracy : {:.2f}%' .format(test_start_days[step], test_end_days[step],\
                                                               accuracy_score(y_true, y_pred)*100))
    
# 최종 1년 정확도
print('\n1년 평균 정확도 : {:.2f}%' .format(np.sum(acc) / test_obs))

예측 구간 : 2019-02-01 ~ 2019-02-28     Accuracy : 50.00%
예측 구간 : 2019-03-01 ~ 2019-03-31     Accuracy : 31.58%
예측 구간 : 2019-04-01 ~ 2019-04-30     Accuracy : 52.38%
예측 구간 : 2019-05-01 ~ 2019-05-31     Accuracy : 21.05%
예측 구간 : 2019-06-01 ~ 2019-06-30     Accuracy : 55.56%
예측 구간 : 2019-07-01 ~ 2019-07-31     Accuracy : 40.91%
예측 구간 : 2019-08-01 ~ 2019-08-31     Accuracy : 55.00%
예측 구간 : 2019-09-01 ~ 2019-09-30     Accuracy : 44.44%
예측 구간 : 2019-10-01 ~ 2019-10-31     Accuracy : 45.00%
예측 구간 : 2019-11-01 ~ 2019-11-30     Accuracy : 60.00%
예측 구간 : 2019-12-01 ~ 2019-12-31     Accuracy : 35.00%
예측 구간 : 2020-01-01 ~ 2020-01-31     Accuracy : 52.63%

1년 평균 정확도 : 45.26%
