# VAR + fbProphet 코인 트레이더 모델 Ver_1.0

# 0. Colab Mount

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install statsmodels==0.11.1
!pip install fbprophet

Collecting statsmodels==0.11.1
[?25l  Downloading https://files.pythonhosted.org/packages/7b/6a/0bf4184c3fb6f9f43df997b88de5784b4cb2f6bd19a5dc213463971076cf/statsmodels-0.11.1-cp37-cp37m-manylinux1_x86_64.whl (8.7MB)
[K     |████████████████████████████████| 8.7MB 4.1MB/s 
Installing collected packages: statsmodels
  Found existing installation: statsmodels 0.10.2
    Uninstalling statsmodels-0.10.2:
      Successfully uninstalled statsmodels-0.10.2
Successfully installed statsmodels-0.11.1


# 1. Library Import

In [3]:
import numpy as np
import pandas as pd
from google.colab import files
import gc
import math
import os.path
import time
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
from dateutil import parser
from tqdm import tqdm
import copy
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.api import VAR
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot
import warnings
import datetime
warnings.filterwarnings("ignore")

# 2. 데이터 불러오기

## 1) read_csv
만일 coin csv가 존재하지 않는다면 coin_index만큼 1로 초기화하여 새로 coin.csv를 생성하고 저장

In [4]:
data_path = '/content/drive/MyDrive/dataset'
train_x_df = pd.read_csv(data_path  + "/train_x_df.csv")
train_y_df = pd.read_csv(data_path  + "/train_y_df.csv")

file = data_path + "/coin.csv"
if os.path.isfile(file):
  print("yes")  
  coin_df = pd.read_csv(data_path + "/coin.csv")
else:
  print("no")
  MAX = max(train_x_df["coin_index"]) + 1
  coin_df = pd.DataFrame(np.zeros([MAX, 2], np.int64), columns = ['coin_index', 'buy_quantity'])

  coin_df.loc[:, 'coin_index'] = coin_df.index
  coin_df.loc[:,'buy_quantity'] = 1

  coin_df.to_csv(data_path + "/coin.csv", mode = 'w')

yes


# New Section

## 2) numpy arrary로 변환하기

In [5]:
def df2d_to_array3d(df_2d):
    # 입력 받은 2차원 데이터 프레임을 3차원 numpy array로 변경하는 함수
    feature_size = df_2d.iloc[:,2:].shape[1]
    time_size = len(df_2d.time.value_counts())
    sample_size = len(df_2d.sample_id.value_counts())
    sample_index = df_2d.sample_id.value_counts().index
    array_3d = df_2d.iloc[:,2:].values.reshape([sample_size, time_size, feature_size])
    return array_3d

In [6]:
train_x_array = df2d_to_array3d(train_x_df)
train_y_array = df2d_to_array3d(train_y_df)

In [7]:
print(f'''
train_x_array {train_x_array.shape}
train_y_array {train_y_array.shape}
''')


train_x_array (7661, 1380, 10)
train_y_array (7661, 120, 10)



## 3) Plot 함수

In [8]:
def plot_series(x_series, y_series):
    #입력 series와 출력 series를 연속적으로 연결하여 시각적으로 보여주는 코드 입니다.
    plt.plot(x_series, label = 'input_series')
    plt.plot(np.arange(len(x_series), len(x_series)+len(y_series)),
             y_series, label = 'output_series')
    plt.axhline(1, c = 'red')
    plt.legend()

# 3. Coin_df 트레이닝

## 1) 트레이닝 필요 함수 생성

### <1> 루프 내부에서 사용할 submission
특정 샘플로 prediction이 만들어지자마자 submission을 생성하기 때문에 row는 한줄이 된다

submission의 buy_quantity는 coin_df에서 해당 샘플의 coin_index를 받은 값을 사용

sell_time은 119로 고정 (이유: var과 fbProphet은 추세 예측에 강점을 가지고 중간에 어떻게 변할지 예측은 매우 어렵기 때문)

prophet과 var 모두 상승을 예측했을 때만 매수 진행 (하나라도 하락 예측하면 buy_quantity를 0으로 고정)

In [9]:
def array_to_submission_immediate(prophet_pred_array, var_pred_array, idx, coin_idx):
    
    submission = pd.DataFrame(np.zeros([1,2], np.int64),
                columns = ['buy_quantity', 'sell_time'])
    submission = submission.reset_index()
    submission['sell_time'] = 119

    if prophet_pred_array[119] > 1.0 and var_pred_array[119] > 1.0:
      submission['buy_quan40tity'] = coin_df.loc[coin_idx, 'buy_quantity']
      
    submission.columns = ['sample_id','buy_quantity', 'sell_time']
    submission['sample_id'] = idx
    return submission

### <2> y_df에서 array로 변환 함수

In [10]:
def df2d_to_answer(df_2d):
    # valid_y_df로부터
    # open 가격 정보가 포함된
    # [샘플 수, 120분] 크기의 
    # 2차원 array를 반환하는 함수
    feature_size = df_2d.iloc[:,2:].shape[1]
    time_size = len(df_2d.time.value_counts())
    sample_size = len(df_2d.sample_id.value_counts())
    sample_index = df_2d.sample_id.value_counts().index
    array_2d = df_2d.open.values.reshape([sample_size, time_size])
    sample_index = list(sample_index)
    return array_2d, sample_index

### <3> 루프 내부에서 사용할 결과 계산 및 피드백 함수
특정 샘플에 대한 submission을 받고 현재 잔액을 입력 받은 후,
submission에 입력된 buy_quantity와 sell_time으로 거래 진행.

이후 잔액을 확인하고 이득을 봤으면 coin_df에서 매수량을 올리고 (최대 1.0)
손해를 봤을 경우 coin_df에서 매수량을 줄이는 것으로 피드백 (최소 0.0)

매수량을 올리고 줄이는 폭은 손실율, 수익률을 기반으로 함

In [11]:
def COIN_immediate(y_df, submission, current_money, idx, coin_idx, df2d_to_answer = df2d_to_answer):
    # 2차원 데이터프레임에서 open 시점 데이터만 추출하여 array로 복원
    # sample_id정보를 index에 저장
    y_array, index = df2d_to_answer(y_df)

    submission_id = int(submission['sample_id'])
    
    sell_time  = int(submission['sell_time'])
    buy_price  = y_array[idx, 0] ##매수했을 때의 실제 가격(처음 0분의 실제 가격)
    sell_price = y_array[idx, sell_time] ##팔 때의 실제 가격(마지막 120분의 실제 가격)
    buy_quantity = float(submission['buy_quantity']) * current_money ##buy_quantity랑 현재 가지고 있는 토탈 머니 곱함
    residual = current_money - buy_quantity ##이전의 토탈머니와 buy_quantity값 곱해준 것과의 차이
    ratio = sell_price / buy_price ##살 때와 팔 때의 가격 차이
    pre_money = current_money
    current_money = buy_quantity * ratio * 0.9995 * 0.9995 + residual

    if pre_money > current_money:
      coin_df.loc[coin_idx, 'buy_quantity'] = max(coin_df.loc[coin_idx, 'buy_quantity'] - (pre_money - current_money) / pre_money, 0.0)
    elif pre_money < current_money and coin_df.loc[coin_idx, 'buy_quantity'] < 1:
      coin_df.loc[coin_idx, 'buy_quantity'] = min(coin_df.loc[coin_idx, 'buy_quantity'] + (current_money - pre_money) / pre_money, 1.0)            
    coin_df.to_csv(data_path + "/coin.csv", mode = 'w')    
    return current_money

## 1) 샘플 Set 가져오기

### <1> 공용 상수

In [12]:
# 시작 sample_id
SAMPLE_SET_INDEX_START = 0
# 종료 sample_id
SAMPLE_SET_INDEX_END = 100
# 초기 시작금
TOTAL_MONEY      = 10000 # dollors

pred_start_id = train_x_df.sample_id.min()
pred_fin_id = train_x_df.sample_id.max()


### <2> Prophet용 Sample 데이터

In [13]:
start_time = '2021-01-31 00:00:00'
start_dt = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')

x_prophet_df = train_x_df[SAMPLE_SET_INDEX_START <= train_x_df.sample_id]
x_prophet_df = x_prophet_df[SAMPLE_SET_INDEX_END >= x_prophet_df.sample_id]
y_prophet_df = train_y_df[SAMPLE_SET_INDEX_START <= train_y_df.sample_id]
y_prophet_df = y_prophet_df[SAMPLE_SET_INDEX_END >= y_prophet_df.sample_id]


prophet_pred_array = np.zeros([(SAMPLE_SET_INDEX_END - SAMPLE_SET_INDEX_START) + 1, 120])
x_prophet_array = df2d_to_array3d(x_prophet_df)
y_prophet_array = df2d_to_array3d(y_prophet_df)

### <3> VAR용 Sample 데이터

In [14]:
var_variables = ["open_ema20","volume_ema20","trades_ema20"]
var_variables_1d = ["open_1d","volume_1d","trades_1d"]

x_var_df = train_x_df[SAMPLE_SET_INDEX_START <= train_x_df.sample_id]
x_var_df = x_var_df[x_var_df.sample_id <= SAMPLE_SET_INDEX_END]
y_var_df = train_y_df[SAMPLE_SET_INDEX_START <= train_y_df.sample_id]
y_var_df = y_var_df[y_var_df.sample_id <= SAMPLE_SET_INDEX_END]

x_var_df['open_ema20'] = x_var_df['open'].ewm(20).mean()
x_var_df['volume_ema20'] = x_var_df['volume'].ewm(20).mean()
x_var_df['trades_ema20'] = x_var_df['trades'].ewm(20).mean()
x_var_df['open_ema20'].iloc[-1] = x_var_df['open'].iloc[-1]
x_var_df['volume_ema20'].iloc[-1] = x_var_df['volume'].iloc[-1]
x_var_df['trades_ema20'].iloc[-1] = x_var_df['trades'].iloc[-1]

var_pred_array = np.zeros([(SAMPLE_SET_INDEX_END - SAMPLE_SET_INDEX_START) + 1, 120])
x_var_array = df2d_to_array3d(x_var_df)
y_var_array = df2d_to_array3d(y_var_df)

print(x_var_df)


        sample_id  time  coin_index  ...  open_ema20  volume_ema20  trades_ema20
0               0     0           9  ...    0.983614  1.334052e-03      0.009855
1               0     1           9  ...    0.983425  1.380855e-03      0.013072
2               0     2           9  ...    0.983169  1.437055e-03      0.013455
3               0     3           9  ...    0.983126  1.727950e-03      0.015631
4               0     4           9  ...    0.983370  1.967757e-03      0.016907
...           ...   ...         ...  ...         ...           ...           ...
139375        100  1375           7  ...    1.006019  6.082070e+06   1289.670078
139376        100  1376           7  ...    1.005535  6.213324e+06   1311.453221
139377        100  1377           7  ...    1.005266  6.024284e+06   1277.801687
139378        100  1378           7  ...    1.004989  5.858186e+06   1264.951683
139379        100  1379           7  ...    1.000000  1.902683e+06    750.363953

[139380 rows x 15 columns]


## 2) 샘플 추론

In [15]:
for idx in tqdm(range(0,(SAMPLE_SET_INDEX_END - SAMPLE_SET_INDEX_START) + 1)):
    # Prophet
    try:
        x_series = x_prophet_array[idx,:,1]

        x_df = pd.DataFrame()
        x_df['ds'] = [start_dt + datetime.timedelta(minutes = time_min) for time_min in np.arange(1, x_series.shape[0]+1).tolist()]
        x_df['y'] = x_series.tolist()

        prophet = Prophet(seasonality_mode='multiplicative', 
                  yearly_seasonality=False,
                  weekly_seasonality=False, daily_seasonality=False, changepoint_range = 0.98, changepoint_prior_scale = 0.05)

        
        prophet.add_seasonality(name='seasonality_A', period=1/12, fourier_order=7)
        prophet.add_seasonality(name='seasonality_A', period=1/8, fourier_order=14)
        prophet.fit(x_df)

        # 120분 테스트 데이터를 예측합니다.
        future_data = prophet.make_future_dataframe(periods=120, freq='min')
        forecast_data = prophet.predict(future_data)

        pred_y = forecast_data.yhat.values[-120:]
        pred_y_lower = forecast_data.yhat_lower.values[-120:]
        pred_y_upper = forecast_data.yhat_upper.values[-120:]

        prophet_pred_array[idx,:] = pred_y

        
    except:
        print(idx, " 샘플은 수렴하지 않습니다.")
        pass
    
    #VAR
    sample_df = x_var_df.groupby("sample_id").get_group(SAMPLE_SET_INDEX_START + idx)
    sample_df_set = sample_df[var_variables]
    sample_df_set.index = sample_df["time"]

    sample_df_set = sample_df_set.diff().dropna()

    forecasting_model = VAR(sample_df_set)
    results = forecasting_model.fit(240)
    lag_order = results.k_ar

    forecast_idx = pd.Index(np.arange(120), dtype='int64', name='time', length=120)
    forecast = pd.DataFrame(results.forecast(sample_df_set.values[-lag_order:], steps=120), index=forecast_idx, columns=var_variables_1d)
    forecast["open_forecasted"] = sample_df["open"].iloc[-1] + forecast["open_1d"].cumsum()
    forecast["volume_forecasted"] = sample_df["volume_ema20"].iloc[-1] + forecast["volume_1d"].cumsum()
    forecast["trades_forecasted"] = sample_df["trades_ema20"].iloc[-1] + forecast["trades_1d"].cumsum()

    forecast_list = forecast["open_forecasted"].tolist()
    var_pred_array[idx,:] = forecast_list

    # plot_series(train_x_array[idx,:,1], train_y_array[idx,:,1])
    # plt.plot(np.arange(1380, 1380+120), forecast_list, label = 'prediction_var')
    # plt.plot(np.arange(1380, 1380+120), pred_y, label = 'prediction_prophet ')
    # plt.plot(np.arange(1380, 1380+120), pred_y_lower, label = 'prediction_prophet_lower')
    # plt.plot(np.arange(1380, 1380+120), pred_y_upper, label = 'prediction_prophet_upper')
    # plt.legend()
    # plt.savefig(f"{data_path}/plot_history/plot_{idx + SAMPLE_SET_INDEX_START}.png")
    # plt.clf()

    coin_idx = int(sample_df['coin_index'].values[0])
    print(coin_idx)
    submission = array_to_submission_immediate(pred_y, forecast_list, SAMPLE_SET_INDEX_START+ idx, coin_idx)
    print(submission)
    print(TOTAL_MONEY)

    TOTAL_MONEY = COIN_immediate(y_prophet_df, submission, TOTAL_MONEY, idx, coin_idx)


  1%|          | 1/101 [00:07<13:04,  7.84s/it]

9
   sample_id  buy_quantity  sell_time
0          0      0.715404        119
10000


  2%|▏         | 2/101 [00:15<12:38,  7.66s/it]

9
   sample_id  buy_quantity  sell_time
0          1             0        119
9991.88784333394


  3%|▎         | 3/101 [00:21<12:06,  7.41s/it]

4
   sample_id  buy_quantity  sell_time
0          2             0        119
9991.88784333394


  4%|▍         | 4/101 [00:28<11:40,  7.23s/it]

0
   sample_id  buy_quantity  sell_time
0          3      0.818764        119
9991.88784333394


  5%|▍         | 5/101 [00:34<11:01,  6.90s/it]

7
   sample_id  buy_quantity  sell_time
0          4             0        119
10000.222874676998


  6%|▌         | 6/101 [00:41<10:52,  6.86s/it]

4
   sample_id  buy_quantity  sell_time
0          5             0        119
10000.222874676998


  7%|▋         | 7/101 [00:48<10:43,  6.85s/it]

6
   sample_id  buy_quantity  sell_time
0          6             0        119
10000.222874676998


  8%|▊         | 8/101 [00:54<10:28,  6.76s/it]

0
   sample_id  buy_quantity  sell_time
0          7             0        119
10000.222874676998


  9%|▉         | 9/101 [01:01<10:17,  6.71s/it]

0
   sample_id  buy_quantity  sell_time
0          8      0.819598        119
10000.222874676998


 10%|▉         | 10/101 [01:09<10:37,  7.01s/it]

9
   sample_id  buy_quantity  sell_time
0          9             0        119
10004.386156657909


 11%|█         | 11/101 [01:15<10:22,  6.91s/it]

4
   sample_id  buy_quantity  sell_time
0         10             0        119
10004.386156657909


 12%|█▏        | 12/101 [01:21<09:46,  6.59s/it]

6
   sample_id  buy_quantity  sell_time
0         11             0        119
10004.386156657909


 13%|█▎        | 13/101 [01:28<09:36,  6.55s/it]

4
   sample_id  buy_quantity  sell_time
0         12      0.829056        119
10004.386156657909


 14%|█▍        | 14/101 [01:34<09:14,  6.38s/it]

4
   sample_id  buy_quantity  sell_time
0         13             0        119
9998.952426051945


 15%|█▍        | 15/101 [01:40<09:07,  6.36s/it]

0
   sample_id  buy_quantity  sell_time
0         14             0        119
9998.952426051945


 16%|█▌        | 16/101 [01:46<09:00,  6.36s/it]

7
   sample_id  buy_quantity  sell_time
0         15             0        119
9998.952426051945


 17%|█▋        | 17/101 [01:54<09:17,  6.63s/it]

8
   sample_id  buy_quantity  sell_time
0         16             0        119
9998.952426051945


 18%|█▊        | 18/101 [01:59<08:49,  6.38s/it]

9
   sample_id  buy_quantity  sell_time
0         17             0        119
9998.952426051945


 19%|█▉        | 19/101 [02:06<08:51,  6.49s/it]

9
   sample_id  buy_quantity  sell_time
0         18             0        119
9998.952426051945


 20%|█▉        | 20/101 [02:12<08:33,  6.34s/it]

5
   sample_id  buy_quantity  sell_time
0         19             0        119
9998.952426051945


 21%|██        | 21/101 [02:18<08:09,  6.12s/it]

1
   sample_id  buy_quantity  sell_time
0         20             0        119
9998.952426051945


 22%|██▏       | 22/101 [02:25<08:30,  6.46s/it]

9
   sample_id  buy_quantity  sell_time
0         21             0        119
9998.952426051945


 23%|██▎       | 23/101 [02:32<08:27,  6.50s/it]

6
   sample_id  buy_quantity  sell_time
0         22      0.704925        119
9998.952426051945


 24%|██▍       | 24/101 [02:38<08:18,  6.47s/it]

4
   sample_id  buy_quantity  sell_time
0         23             0        119
10052.318699455072


 25%|██▍       | 25/101 [02:44<08:09,  6.44s/it]

9
   sample_id  buy_quantity  sell_time
0         24             0        119
10052.318699455072


 26%|██▌       | 26/101 [02:51<08:01,  6.42s/it]

5
   sample_id  buy_quantity  sell_time
0         25             0        119
10052.318699455072


 27%|██▋       | 27/101 [02:58<08:07,  6.59s/it]

4
   sample_id  buy_quantity  sell_time
0         26             0        119
10052.318699455072


 28%|██▊       | 28/101 [03:04<07:48,  6.42s/it]

9
   sample_id  buy_quantity  sell_time
0         27             0        119
10052.318699455072


 29%|██▊       | 29/101 [03:11<08:05,  6.75s/it]

9
   sample_id  buy_quantity  sell_time
0         28             0        119
10052.318699455072


 30%|██▉       | 30/101 [03:17<07:46,  6.57s/it]

7
   sample_id  buy_quantity  sell_time
0         29             0        119
10052.318699455072


 31%|███       | 31/101 [03:24<07:40,  6.58s/it]

0
   sample_id  buy_quantity  sell_time
0         30             0        119
10052.318699455072


 32%|███▏      | 32/101 [03:30<07:18,  6.35s/it]

4
   sample_id  buy_quantity  sell_time
0         31      0.828513        119
10052.318699455072


 33%|███▎      | 33/101 [03:36<07:04,  6.24s/it]

4
   sample_id  buy_quantity  sell_time
0         32             0        119
10117.527692313344


 34%|███▎      | 34/101 [03:42<06:56,  6.21s/it]

6
   sample_id  buy_quantity  sell_time
0         33             0        119
10117.527692313344


 35%|███▍      | 35/101 [03:50<07:27,  6.78s/it]

0
   sample_id  buy_quantity  sell_time
0         34             0        119
10117.527692313344


 36%|███▌      | 36/101 [03:57<07:17,  6.73s/it]

7
   sample_id  buy_quantity  sell_time
0         35             0        119
10117.527692313344


 37%|███▋      | 37/101 [04:04<07:21,  6.89s/it]

6
   sample_id  buy_quantity  sell_time
0         36             0        119
10117.527692313344


 38%|███▊      | 38/101 [04:11<07:12,  6.87s/it]

9
   sample_id  buy_quantity  sell_time
0         37             0        119
10117.527692313344


 39%|███▊      | 39/101 [04:17<07:01,  6.79s/it]

0
   sample_id  buy_quantity  sell_time
0         38             0        119
10117.527692313344


 40%|███▉      | 40/101 [04:26<07:17,  7.18s/it]

6
   sample_id  buy_quantity  sell_time
0         39             0        119
10117.527692313344


 41%|████      | 41/101 [04:32<07:01,  7.02s/it]

6
   sample_id  buy_quantity  sell_time
0         40             0        119
10117.527692313344


 42%|████▏     | 42/101 [04:38<06:39,  6.77s/it]

8
   sample_id  buy_quantity  sell_time
0         41             0        119
10117.527692313344


 43%|████▎     | 43/101 [04:45<06:27,  6.68s/it]

7
   sample_id  buy_quantity  sell_time
0         42             0        119
10117.527692313344


 44%|████▎     | 44/101 [04:51<06:18,  6.64s/it]

8
   sample_id  buy_quantity  sell_time
0         43             0        119
10117.527692313344


 45%|████▍     | 45/101 [05:00<06:49,  7.31s/it]

4
   sample_id  buy_quantity  sell_time
0         44             0        119
10117.527692313344


 46%|████▌     | 46/101 [05:06<06:17,  6.86s/it]

5
   sample_id  buy_quantity  sell_time
0         45             0        119
10117.527692313344


 47%|████▋     | 47/101 [05:12<05:58,  6.63s/it]

4
   sample_id  buy_quantity  sell_time
0         46             0        119
10117.527692313344


 48%|████▊     | 48/101 [05:19<05:47,  6.55s/it]

7
   sample_id  buy_quantity  sell_time
0         47      0.987986        119
10117.527692313344


 49%|████▊     | 49/101 [05:25<05:44,  6.62s/it]

8
   sample_id  buy_quantity  sell_time
0         48             0        119
10101.887814962674


 50%|████▉     | 50/101 [05:31<05:26,  6.40s/it]

7
   sample_id  buy_quantity  sell_time
0         49       0.98644        119
10101.887814962674


 50%|█████     | 51/101 [05:38<05:25,  6.50s/it]

6
   sample_id  buy_quantity  sell_time
0         50             0        119
9909.18917664458


 51%|█████▏    | 52/101 [05:44<05:06,  6.26s/it]

4
   sample_id  buy_quantity  sell_time
0         51         0.835        119
9909.18917664458


 52%|█████▏    | 53/101 [05:50<05:04,  6.34s/it]

6
   sample_id  buy_quantity  sell_time
0         52      0.710262        119
9936.162079906882


 53%|█████▎    | 54/101 [05:56<04:55,  6.28s/it]

8
   sample_id  buy_quantity  sell_time
0         53      0.727155        119
9906.849512878896


 54%|█████▍    | 55/101 [06:02<04:43,  6.15s/it]

4
   sample_id  buy_quantity  sell_time
0         54      0.837722        119
9929.383310042322


 55%|█████▌    | 56/101 [06:09<04:40,  6.23s/it]

7
   sample_id  buy_quantity  sell_time
0         55             0        119
9917.42289147595


 56%|█████▋    | 57/101 [06:14<04:27,  6.09s/it]

9
   sample_id  buy_quantity  sell_time
0         56             0        119
9917.42289147595


 57%|█████▋    | 58/101 [06:21<04:31,  6.31s/it]

0
   sample_id  buy_quantity  sell_time
0         57             0        119
9917.42289147595


 58%|█████▊    | 59/101 [06:29<04:38,  6.63s/it]

6
   sample_id  buy_quantity  sell_time
0         58             0        119
9917.42289147595


 59%|█████▉    | 60/101 [06:35<04:32,  6.66s/it]

0
   sample_id  buy_quantity  sell_time
0         59             0        119
9917.42289147595


 60%|██████    | 61/101 [06:41<04:19,  6.50s/it]

3
   sample_id  buy_quantity  sell_time
0         60             0        119
9917.42289147595


 61%|██████▏   | 62/101 [06:47<04:03,  6.25s/it]

4
   sample_id  buy_quantity  sell_time
0         61             0        119
9917.42289147595


 62%|██████▏   | 63/101 [06:53<03:58,  6.28s/it]

0
   sample_id  buy_quantity  sell_time
0         62             0        119
9917.42289147595


 63%|██████▎   | 64/101 [06:59<03:45,  6.09s/it]

8
   sample_id  buy_quantity  sell_time
0         63             0        119
9917.42289147595


 64%|██████▍   | 65/101 [07:05<03:36,  6.02s/it]

0
   sample_id  buy_quantity  sell_time
0         64             0        119
9917.42289147595


 65%|██████▌   | 66/101 [07:12<03:38,  6.24s/it]

6
   sample_id  buy_quantity  sell_time
0         65             0        119
9917.42289147595


 66%|██████▋   | 67/101 [07:19<03:42,  6.54s/it]

7
   sample_id  buy_quantity  sell_time
0         66             0        119
9917.42289147595


 67%|██████▋   | 68/101 [07:25<03:35,  6.53s/it]

1
   sample_id  buy_quantity  sell_time
0         67             0        119
9917.42289147595


 68%|██████▊   | 69/101 [07:31<03:18,  6.22s/it]

4
   sample_id  buy_quantity  sell_time
0         68             0        119
9917.42289147595


 69%|██████▉   | 70/101 [07:37<03:12,  6.23s/it]

7
   sample_id  buy_quantity  sell_time
0         69             0        119
9917.42289147595


 70%|███████   | 71/101 [07:44<03:12,  6.42s/it]

8
   sample_id  buy_quantity  sell_time
0         70             0        119
9917.42289147595


 71%|███████▏  | 72/101 [07:51<03:09,  6.52s/it]

6
   sample_id  buy_quantity  sell_time
0         71             0        119
9917.42289147595


 72%|███████▏  | 73/101 [07:58<03:06,  6.67s/it]

4
   sample_id  buy_quantity  sell_time
0         72             0        119
9917.42289147595


 73%|███████▎  | 74/101 [08:04<02:56,  6.52s/it]

5
   sample_id  buy_quantity  sell_time
0         73             0        119
9917.42289147595


 74%|███████▍  | 75/101 [08:11<02:54,  6.72s/it]

6
   sample_id  buy_quantity  sell_time
0         74      0.707312        119
9917.42289147595


 75%|███████▌  | 76/101 [08:17<02:43,  6.53s/it]

6
   sample_id  buy_quantity  sell_time
0         75             0        119
9938.406116796428


 76%|███████▌  | 77/101 [08:23<02:30,  6.25s/it]

5
   sample_id  buy_quantity  sell_time
0         76             0        119
9938.406116796428


 77%|███████▋  | 78/101 [08:29<02:25,  6.31s/it]

7
   sample_id  buy_quantity  sell_time
0         77             0        119
9938.406116796428


 78%|███████▊  | 79/101 [08:36<02:21,  6.44s/it]

0
   sample_id  buy_quantity  sell_time
0         78             0        119
9938.406116796428


 79%|███████▉  | 80/101 [08:44<02:22,  6.77s/it]

8
   sample_id  buy_quantity  sell_time
0         79             0        119
9938.406116796428


 80%|████████  | 81/101 [08:50<02:13,  6.66s/it]

8
   sample_id  buy_quantity  sell_time
0         80             0        119
9938.406116796428


 81%|████████  | 82/101 [08:56<02:03,  6.52s/it]

9
   sample_id  buy_quantity  sell_time
0         81             0        119
9938.406116796428


 82%|████████▏ | 83/101 [09:03<01:57,  6.53s/it]

9
   sample_id  buy_quantity  sell_time
0         82             0        119
9938.406116796428


 83%|████████▎ | 84/101 [09:09<01:51,  6.57s/it]

6
   sample_id  buy_quantity  sell_time
0         83             0        119
9938.406116796428


 84%|████████▍ | 85/101 [09:15<01:40,  6.28s/it]

1
   sample_id  buy_quantity  sell_time
0         84             0        119
9938.406116796428


 85%|████████▌ | 86/101 [09:22<01:37,  6.48s/it]

5
   sample_id  buy_quantity  sell_time
0         85             0        119
9938.406116796428


 86%|████████▌ | 87/101 [09:29<01:32,  6.62s/it]

7
   sample_id  buy_quantity  sell_time
0         86             0        119
9938.406116796428


 87%|████████▋ | 88/101 [09:36<01:28,  6.81s/it]

6
   sample_id  buy_quantity  sell_time
0         87             0        119
9938.406116796428


 88%|████████▊ | 89/101 [09:42<01:20,  6.68s/it]

9
   sample_id  buy_quantity  sell_time
0         88             0        119
9938.406116796428


 89%|████████▉ | 90/101 [09:49<01:13,  6.64s/it]

0
   sample_id  buy_quantity  sell_time
0         89             0        119
9938.406116796428


 90%|█████████ | 91/101 [09:56<01:06,  6.68s/it]

7
   sample_id  buy_quantity  sell_time
0         90             0        119
9938.406116796428


 91%|█████████ | 92/101 [10:03<01:00,  6.77s/it]

0
   sample_id  buy_quantity  sell_time
0         91      0.820014        119
9938.406116796428


 92%|█████████▏| 93/101 [10:09<00:52,  6.60s/it]

4
   sample_id  buy_quantity  sell_time
0         92             0        119
10076.449968815792


 93%|█████████▎| 94/101 [10:15<00:44,  6.30s/it]

7
   sample_id  buy_quantity  sell_time
0         93             0        119
10076.449968815792


 94%|█████████▍| 95/101 [10:21<00:38,  6.40s/it]

9
   sample_id  buy_quantity  sell_time
0         94      0.714592        119
10076.449968815792


 95%|█████████▌| 96/101 [10:29<00:33,  6.69s/it]

1
   sample_id  buy_quantity  sell_time
0         95             0        119
10062.708807136976


 96%|█████████▌| 97/101 [10:35<00:26,  6.52s/it]

5
   sample_id  buy_quantity  sell_time
0         96             0        119
10062.708807136976


 97%|█████████▋| 98/101 [10:40<00:18,  6.23s/it]

5
   sample_id  buy_quantity  sell_time
0         97             0        119
10062.708807136976


 98%|█████████▊| 99/101 [10:46<00:12,  6.20s/it]

8
   sample_id  buy_quantity  sell_time
0         98             0        119
10062.708807136976


 99%|█████████▉| 100/101 [10:53<00:06,  6.29s/it]

0
   sample_id  buy_quantity  sell_time
0         99      0.833904        119
10062.708807136976


100%|██████████| 101/101 [10:59<00:00,  6.53s/it]

7
   sample_id  buy_quantity  sell_time
0        100             0        119
9993.614152867





In [16]:
print(TOTAL_MONEY)
print(coin_df)

9993.614152867
   Unnamed: 0  Unnamed: 0.1  ...  coin_index  buy_quantity
0           0             0  ...           0      0.827038
1           1             1  ...           1      0.752804
2           2             2  ...           2      0.757625
3           3             3  ...           3      0.983702
4           4             4  ...           4      0.836517
5           5             5  ...           5      0.902802
6           6             6  ...           6      0.709428
7           7             7  ...           7      0.967365
8           8             8  ...           8      0.729429
9           9             9  ...           9      0.713229

[10 rows x 7 columns]


# 4. Coin_df 트레이닝 종료 후 데이터 추론

In [17]:
def array_to_submission(prophet_pred_array, var_pred_array):
    # 입력 x_arrry와 출력 pred_arry를 통해서 
    # buy_quantitiy와 sell_time을 결정
    
    submission = pd.DataFrame(np.zeros([prophet_pred_array.shape[0],2], np.int64),
                columns = ['buy_quantity', 'sell_time'])
    submission = submission.reset_index()
    submission.loc[:, 'sell_time'] = 119


    for idx in range(0, prophet_pred_array.shape[0]):
      coin_idx = train_x_df.loc[SAMPLE_SET_INDEX_START + idx, 'coin_index']
      if prophet_pred_array[idx, 119] > 1.0 and var_pred_array[idx, 119] > 1.0:
        submission.loc[idx, 'buy_quantity'] = coin_df.loc[coin_idx, 'buy_quantity']
      

    # 모델이 예측값 중 최대 값에 해당하는 시간에 매도
    submission.columns = ['sample_id','buy_quantity', 'sell_time']
    submission['sample_id'] = np.arange(SAMPLE_SET_INDEX_START,SAMPLE_SET_INDEX_END + 1)
    return submission

In [18]:
valid_submission = array_to_submission(prophet_pred_array, var_pred_array)

In [19]:
valid_submission

Unnamed: 0,sample_id,buy_quantity,sell_time
0,0,0.713229,119
1,1,0.000000,119
2,2,0.000000,119
3,3,0.713229,119
4,4,0.000000,119
...,...,...,...
96,96,0.000000,119
97,97,0.000000,119
98,98,0.000000,119
99,99,0.713229,119


In [20]:
valid_submission.buy_quantity.value_counts()

0.000000    85
0.713229    16
Name: buy_quantity, dtype: int64

### <4> 투자 후 금액 계산하기

In [21]:
def df2d_to_answer(df_2d):
    # valid_y_df로부터
    # open 가격 정보가 포함된
    # [샘플 수, 120분] 크기의 
    # 2차원 array를 반환하는 함수
    feature_size = df_2d.iloc[:,2:].shape[1]
    time_size = len(df_2d.time.value_counts())
    sample_size = len(df_2d.sample_id.value_counts())
    sample_index = df_2d.sample_id.value_counts().index
    array_2d = df_2d.open.values.reshape([sample_size, time_size])
    sample_index = list(sample_index)
    return array_2d, sample_index


def COIN(y_df, submission, df2d_to_answer = df2d_to_answer):
    # 2차원 데이터프레임에서 open 시점 데이터만 추출하여 array로 복원
    # sample_id정보를 index에 저장
    y_array, index = df2d_to_answer(y_df)
    
    # index 기준으로 submission을 다시 선택
    submission = submission.set_index(submission.columns[0])
    submission = submission.iloc[index, :]    
    
    # 초기 투자 비용은 10000 달러
    total_money      = 10000 # dolors
    total_money_list = []
    
    # 가장 처음 sample_id값
    start_index = submission.index[0]
    for row_idx in submission.index:
        sell_time  = submission.loc[row_idx, 'sell_time']
        buy_price  = y_array[row_idx - start_index, 0] ##매수했을 때의 실제 가격(처음 0분의 실제 가격)
        sell_price = y_array[row_idx - start_index, sell_time] ##팔 때의 실제 가격(마지막 120분의 실제 가격)
        buy_quantity = submission.loc[row_idx, 'buy_quantity'] * total_money ##buy_quantity랑 현재 가지고 있는 토탈 머니 곱함
        residual = total_money - buy_quantity ##이전의 토탈머니와 buy_quantity값 곱해준 것과의 차이
        ratio = sell_price / buy_price ##살 때와 팔 때의 가격 차이
        pre_money = total_money
        total_money = buy_quantity * ratio * 0.9995 * 0.9995 + residual
        total_money_list.append(total_money)
        coin_idx = train_x_df.loc[SAMPLE_SET_INDEX_START + row_idx, 'coin_index']
        if pre_money > total_money:
          coin_df.loc[coin_idx, 'buy_quantity'] = max(coin_df.loc[coin_idx, 'buy_quantity'] - (pre_money - total_money) / pre_money, 0.0)
        elif pre_money < total_money and coin_df.loc[coin_idx, 'buy_quantity'] < 1:
          coin_df.loc[coin_idx, 'buy_quantity'] = min(coin_df.loc[coin_idx, 'buy_quantity'] + (total_money - pre_money) / pre_money, 1.0)     

    print(coin_df)             
    coin_df.to_csv(data_path + "/coin.csv", mode = 'w')    
    return total_money, total_money_list

# New Section

In [22]:
total_money, total_money_list = COIN(y_prophet_df,
                                     valid_submission)

   Unnamed: 0  Unnamed: 0.1  ...  coin_index  buy_quantity
0           0             0  ...           0      0.827038
1           1             1  ...           1      0.752804
2           2             2  ...           2      0.757625
3           3             3  ...           3      0.983702
4           4             4  ...           4      0.836517
5           5             5  ...           5      0.902802
6           6             6  ...           6      0.709428
7           7             7  ...           7      0.967365
8           8             8  ...           8      0.729429
9           9             9  ...           9      0.795206

[10 rows x 7 columns]


In [24]:
# 투자 후 금액
print(total_money)

10833.203995832064


In [None]:
# 투자 히스토리
plt.plot(total_momey_list)
plt.title("history")
plt.show()

NameError: ignored

# 4. test 데이터 추론하기

In [None]:

test_x_df = pd.read_csv(data_path  + "/test_x_df.csv")
SAMPLE_SET_INDEX_END = test_x_df.sample_id.max()
SAMPLE_SET_INDEX_START = test_x_df.sample_id.min()

start_time = '2021-01-31 00:00:00'
start_dt = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')

x_prophet_df = test_x_df[SAMPLE_SET_INDEX_START <= test_x_df.sample_id]
x_prophet_df = x_prophet_df[SAMPLE_SET_INDEX_END >= x_prophet_df.sample_id]


prophet_pred_array = np.zeros([(SAMPLE_SET_INDEX_END - SAMPLE_SET_INDEX_START) + 1, 120])
x_prophet_array = df2d_to_array3d(x_prophet_df)


var_variables = ["open_ema20","volume_ema20","trades_ema20"]
var_variables_1d = ["open_1d","volume_1d","trades_1d"]

x_var_df = test_x_df[SAMPLE_SET_INDEX_START <= test_x_df.sample_id]
x_var_df = x_var_df[x_var_df.sample_id <= SAMPLE_SET_INDEX_END]

x_var_df['open_ema20'] = x_var_df['open'].ewm(20).mean()
x_var_df['volume_ema20'] = x_var_df['volume'].ewm(20).mean()
x_var_df['trades_ema20'] = x_var_df['trades'].ewm(20).mean()
x_var_df['open_ema20'].iloc[-1] = x_var_df['open'].iloc[-1]
x_var_df['volume_ema20'].iloc[-1] = x_var_df['volume'].iloc[-1]
x_var_df['trades_ema20'].iloc[-1] = x_var_df['trades'].iloc[-1]

var_pred_array = np.zeros([(SAMPLE_SET_INDEX_END - SAMPLE_SET_INDEX_START) + 1, 120])
x_var_array = df2d_to_array3d(x_var_df)


In [None]:



for idx in tqdm(range(0,(SAMPLE_SET_INDEX_END - SAMPLE_SET_INDEX_START) + 1)):
    # Prophet
    try:
        x_series = x_prophet_array[idx,:,1]

        x_df = pd.DataFrame()
        x_df['ds'] = [start_dt + datetime.timedelta(minutes = time_min) for time_min in np.arange(1, x_series.shape[0]+1).tolist()]
        x_df['y'] = x_series.tolist()

        prophet = Prophet(seasonality_mode='multiplicative', 
                  yearly_seasonality=False,
                  weekly_seasonality=False, daily_seasonality=False, changepoint_range = 0.98, changepoint_prior_scale = 0.05)

        
        prophet.add_seasonality(name='seasonality_A', period=1/12, fourier_order=7)
        prophet.add_seasonality(name='seasonality_A', period=1/8, fourier_order=14)
        prophet.fit(x_df)

        # 120분 테스트 데이터를 예측합니다.
        future_data = prophet.make_future_dataframe(periods=120, freq='min')
        forecast_data = prophet.predict(future_data)

        pred_y = forecast_data.yhat.values[-120:]
        pred_y_lower = forecast_data.yhat_lower.values[-120:]
        pred_y_upper = forecast_data.yhat_upper.values[-120:]

        prophet_pred_array[idx,:] = pred_y

        
    except:
        print(idx, " 샘플은 수렴하지 않습니다.")
        pass
    
    #VAR
    sample_df = x_var_df.groupby("sample_id").get_group(SAMPLE_SET_INDEX_START + idx)
    sample_df_set = sample_df[var_variables]
    sample_df_set.index = sample_df["time"]

    sample_df_set = sample_df_set.diff().dropna()

    forecasting_model = VAR(sample_df_set)
    results = forecasting_model.fit(240)
    lag_order = results.k_ar

    forecast_idx = pd.Index(np.arange(120), dtype='int64', name='time', length=120)
    forecast = pd.DataFrame(results.forecast(sample_df_set.values[-lag_order:], steps=120), index=forecast_idx, columns=var_variables_1d)
    forecast["open_forecasted"] = sample_df["open"].iloc[-1] + forecast["open_1d"].cumsum()
    forecast["volume_forecasted"] = sample_df["volume_ema20"].iloc[-1] + forecast["volume_1d"].cumsum()
    forecast["trades_forecasted"] = sample_df["trades_ema20"].iloc[-1] + forecast["trades_1d"].cumsum()

    forecast_list = forecast["open_forecasted"].tolist()
    var_pred_array[idx,:] = forecast_list

    # plot_series(train_x_array[idx,:,1], train_y_array[idx,:,1])
    # plt.plot(np.arange(1380, 1380+120), forecast_list, label = 'prediction_var')
    # plt.plot(np.arange(1380, 1380+120), pred_y, label = 'prediction_prophet ')
    # plt.plot(np.arange(1380, 1380+120), pred_y_lower, label = 'prediction_prophet_lower')
    # plt.plot(np.arange(1380, 1380+120), pred_y_upper, label = 'prediction_prophet_upper')
    # plt.legend()
    # plt.savefig(f"{data_path}/plot_history/plot_{idx + SAMPLE_SET_INDEX_START}.png")
    # plt.clf()


100%|██████████| 760/760 [1:26:02<00:00,  6.79s/it]


## 추론한 결과와 coin_df 데이터를 바탕으로 submission df 생성하기

In [None]:
print(prophet_pred_array)
print(var_pred_array)
print(coin_df)

[[0.99827926 0.99784668 0.99732759 ... 0.94255388 0.94206669 0.94150858]
 [0.98580426 0.98508218 0.98433652 ... 0.9223994  0.92118779 0.92001722]
 [1.00523872 1.00548425 1.00564423 ... 1.04723398 1.0474283  1.04766779]
 ...
 [0.99633462 0.99601801 0.99579831 ... 1.00920719 1.00914444 1.00905431]
 [1.00309134 1.00435976 1.00563148 ... 1.08474518 1.0856608  1.08664854]
 [1.01301313 1.01307733 1.01306756 ... 1.04756782 1.0477861  1.0483071 ]]
[[0.99980034 0.99955044 0.99928177 ... 1.00118919 1.00126008 1.00137654]
 [1.00052801 1.00118931 1.00196517 ... 0.97928521 0.97938274 0.9791816 ]
 [1.00011001 1.00035516 1.00061384 ... 0.99658084 0.99669672 0.99687114]
 ...
 [1.00018517 1.00035872 1.00071757 ... 1.00201722 1.00179254 1.00169913]
 [1.00089979 1.00175338 1.00293778 ... 1.0121707  1.01328602 1.01441894]
 [0.99819848 0.99667783 0.99495826 ... 0.97258805 0.97302406 0.97310068]]
   Unnamed: 0  coin_index  buy_quantity
0           0           0      1.000000
1           1           1      0

In [None]:
def array_to_submission(prophet_pred_array, var_pred_array):
    # 입력 x_arrry와 출력 pred_arry를 통해서 
    # buy_quantitiy와 sell_time을 결정
    
    submission = pd.DataFrame(np.zeros([prophet_pred_array.shape[0],2], np.int64),
                columns = ['buy_quantity', 'sell_time'])
    submission = submission.reset_index()
    submission.loc[:, 'sell_time'] = 119
    sample_df_group = x_var_df.groupby("sample_id")

    for idx in range(0, prophet_pred_array.shape[0]):
      sample_df = sample_df_group.get_group(SAMPLE_SET_INDEX_START + idx)
      coin_idx = int(sample_df['coin_index'].values[0])
      print(coin_idx)
      if prophet_pred_array[idx, 119] > 1.0 and var_pred_array[idx, 119] > 1.0:
        submission.loc[idx, 'buy_quantity'] = coin_df.loc[coin_idx, 'buy_quantity']
      

    # 모델이 예측값 중 최대 값에 해당하는 시간에 매도
    submission.columns = ['sample_id','buy_quantity', 'sell_time']
    submission['sample_id'] = np.arange(SAMPLE_SET_INDEX_START,SAMPLE_SET_INDEX_END + 1)
    print(submission)
    return submission


valid_submission = array_to_submission(prophet_pred_array, var_pred_array)

1
5
3
1
7
8
5
4
8
7
3
4
7
3
5
4
8
3
8
5
8
4
7
3
4
1
5
0
0
4
1
3
0
2
8
7
4
8
5
0
7
7
6
9
3
9
6
3
5
1
0
2
0
5
0
2
2
7
6
5
0
1
7
5
1
1
9
4
5
6
2
9
5
4
9
1
1
6
8
4
4
8
0
4
2
6
5
9
9
3
3
8
7
5
7
3
4
7
3
3
4
3
3
8
6
9
5
2
2
9
4
7
9
1
7
2
9
6
0
9
2
8
3
3
3
6
2
3
0
7
8
2
7
6
4
4
9
1
5
3
8
1
1
6
5
1
9
5
5
0
5
6
7
3
9
2
7
2
6
3
0
0
3
5
3
8
6
8
7
1
8
4
1
9
9
0
9
4
2
0
5
1
2
8
4
3
8
6
0
0
9
8
7
6
2
9
8
6
3
0
0
0
2
6
2
3
1
4
9
0
9
4
6
9
9
9
9
0
4
3
5
6
0
5
8
7
8
5
7
0
5
7
8
1
7
9
6
4
0
4
9
3
6
1
5
8
1
0
3
0
1
7
1
1
8
2
0
6
8
0
3
2
1
8
8
6
6
3
1
3
6
4
7
6
1
0
7
7
1
8
3
9
6
2
7
2
1
6
4
6
6
6
0
1
8
1
6
3
4
9
2
8
4
5
3
9
7
5
5
5
5
1
1
6
7
0
8
8
2
6
1
5
4
6
9
7
0
5
2
0
2
1
0
6
5
9
4
9
7
7
4
5
6
2
8
2
6
7
2
0
2
3
7
1
4
5
4
8
7
3
2
5
1
3
9
2
9
2
7
3
0
0
4
5
6
5
0
1
4
2
8
8
2
9
2
3
9
7
5
1
3
4
3
2
6
0
8
5
2
0
2
9
1
7
5
6
1
5
7
4
9
7
6
2
2
1
4
9
4
7
3
9
9
8
9
8
8
7
8
4
6
0
0
1
4
2
4
2
4
8
6
6
2
8
9
8
9
0
6
8
6
8
4
5
7
2
3
8
1
0
1
9
3
1
7
9
6
6
7
9
5
1
4
0
0
2
4
4
9
5
0
0
3
5
2
1
1
8
8
1
0
4
6
3
8
8
7
8
7
6


In [None]:
valid_submission.buy_quantity.value_counts() 

0.000000    518
1.000000     95
0.864439     30
0.912828     30
0.973971     25
0.980072     22
0.899263     21
0.938551     19
Name: buy_quantity, dtype: int64

In [None]:
valid_submission.to_csv(data_path+"/submission5.csv", index = False)