In [None]:
!pip install pyupbit

In [None]:
import time
import pyupbit
import datetime

import numpy as np
import pandas as pd
import torch

In [None]:
# 20120101~20220430 851일 간의 비트코인 시가 고가 저가 종가 일별 수익률 데이터

df = pyupbit.get_ohlcv("KRW-BTC", interval="day", count=852, to="20220501") 

df['daily_return'] = df['close'].pct_change()
df = df.iloc[1:]
df = df.drop(['volume','value'],axis=1)

In [None]:
from sklearn.model_selection import train_test_split

# train, valid, test 로 나누기 

train, b = train_test_split(df, test_size=0.2, train_size=0.8, shuffle=False)

valid, test = train_test_split(b, test_size=0.5, train_size=0.5, shuffle=False)


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

train = pd.DataFrame(scaler.fit_transform(train))
valid = pd.DataFrame(scaler.transform(valid))
test = pd.DataFrame(scaler.transform(test))


train.rename(columns = {0:'open',1:'high',2:'low',3:'close',4:'daily_return'}, inplace = True)
valid.rename(columns = {0:'open',1:'high',2:'low',3:'close',4:'daily_return'}, inplace = True)
test.rename(columns = {0:'open',1:'high',2:'low',3:'close',4:'daily_return'}, inplace = True)

In [None]:
def make_dataset(data, label, window_size=5):
    feature_list = []
    label_list = []
    for i in range(len(data) - window_size):
        feature_list.append(np.array(data.iloc[i:i+window_size] ))
        label_list.append(np.array(label.iloc[i+window_size] ))
    return np.array(feature_list), np.array(label_list)

In [None]:
train

In [None]:
feature_cols = ['open','high','low','close','daily_return']
label_cols = ['close']

train_feature = train[feature_cols]
train_label = train[label_cols]

valid_feature = valid[feature_cols]
valid_label = valid[label_cols]

test_feature = test[feature_cols]
test_label = test[label_cols]

# train dataset
train_feature, train_label = make_dataset(train_feature, train_label, 5)

 # valid dataset 
valid_feature, valid_label = make_dataset(valid_feature, valid_label, 5)

 # test dataset (실제 예측 해볼 데이터)
test_feature, test_label = make_dataset(test_feature, test_label, 5)

In [None]:
# print(train_feature.shape, train_label.shape)
# print(valid_feature.shape, valid_label.shape)
# print(test_feature.shape, test_label.shape)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LSTM

model = Sequential()
model.add(LSTM(30, # unit 수 변경 
               input_shape=(train_feature.shape[1], train_feature.shape[2]), 
               activation='relu', 
               return_sequences=False)
          )

# model.add(Dense(units=10))
# model.add(Dropout(0.1))

model.add(Dense(1))

In [None]:
import os
import tensorflow as tf
opt = tf.keras.optimizers.Adam(learning_rate=0.01)

model.compile(loss='mean_squared_error', optimizer= opt)
early_stop = EarlyStopping(monitor='val_loss', patience=5)
#filename = os.path.join(path, 'tmp_checkpoint.h5')
#checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

history = model.fit(train_feature, train_label, 
                    epochs=30, 
                    batch_size=1,
                    validation_data=(valid_feature,valid_label),
                    callbacks=[early_stop]) #callbacks=[early_stop,checkpoint]




In [None]:
# history 
import matplotlib.pyplot as plt
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss)+1)
plt.figure()
plt.plot(epochs, loss, label='Training loss')
plt.plot(epochs, val_loss, label='Validation loss')
plt.title('Training and Valdation loss')
plt.legend()
plt.show()  

In [None]:
# 예측
pred = model.predict(test_feature)

In [None]:
from sklearn.metrics import mean_squared_error 

MSE_test = mean_squared_error(test_label,pred) 

print('test RMSE:', np.sqrt(MSE_test))

train_pred = model.predict(train_feature)
train_MSE = mean_squared_error(train_label, train_pred) 

print('train RMSE:', np.sqrt(train_MSE))

In [None]:
pd.Series(pred.squeeze()).plot()

In [None]:
pred.shape

In [None]:
# inverse transform을 위해 다시 차원을 처음 데이터의 열 개수와 같게 만듦

df_pred = pd.DataFrame(index=range(0,pred.shape[0]), columns=['open', 'high','low','close','volume'])
df_pred = df_pred.fillna(0)
df_pred['close'] = pred
df_pred

In [None]:
inverse_pred = scaler.inverse_transform(df_pred)[:,3] # close : 3 

# 실제와 예측 비트코인 가격의 데이터프레임 만든 뒤 plot 그리기

real_close_value = pd.DataFrame(df.iloc[-pred.shape[0]:,3]) # close : 3

real_close_value['inverse_pred'] = inverse_pred

real_close_value.rename(columns={'close':'Actual', 'inverse_pred':'Prediction'},inplace=True)

real_close_value.plot(figsize=(8,8))

real_close_value

In [None]:
# scaler.inverse_transform(df_pred)

# 모델 저장

In [None]:
model.save('lstm_predict.h5')
new_model = tf.keras.models.load_model('lstm_predict.h5')

# test_loss, test_acc = new_model.evaluate(x,  y, verbose=2)

# 저장한 모델 다시 사용

In [None]:
pred = new_model.predict(test_feature)


# inverse transform을 위해 다시 차원을 처음 데이터의 열 개수와 같게 만듦

df_pred = pd.DataFrame(index=range(0,pred.shape[0]), columns=['open', 'high','low','close','volume'])
df_pred = df_pred.fillna(0)
df_pred['close'] = pred
df_pred


inverse_pred = scaler.inverse_transform(df_pred)[:,3] # close : 3 

# 실제와 예측 비트코인 가격의 데이터프레임 만든 뒤 plot 그리기

real_close_value = pd.DataFrame(df.iloc[-pred.shape[0]:,3]) # close : 3

real_close_value['inverse_pred'] = inverse_pred

real_close_value.rename(columns={'close':'Actual', 'inverse_pred':'Prediction'},inplace=True)

real_close_value.plot(figsize=(8,8))

real_close_value

# 투자 성과 결과

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
path = '/gdrive/MyDrive/융합소프트웨어프로젝트/프로젝트/혜준'

# 1번 단순 변동성 투자 전략 

In [None]:
import pyupbit
import numpy as np

# OHLCV
df1 = pyupbit.get_ohlcv("KRW-BTC",count=81, to= '20220501') # 81의 예측 


## 매수가 구하기 위해 작성된 코드 

# 변동폭 * k계사, (고가-저가)*k값
df1['range'] = (df1['high'] - df1['low']) * 0.2 # besk k= 0.8

# target(매수가), range 컬럼 한칸씩 밑으로 내림 
df1['target'] = df1['open'] + df1['range'].shift(1) # 어제 변동폭



fee = 0.0005
# ror(수익률), np.where(조건문, 참일때 값, 거짓일때 값)
df1['ror'] = np.where(df1['high'] > df1['target'], # 매수 진행된 상황
                      
                     (df1['close'] - fee*(df1['close']+df1['target'])) / df1['target'], # 종가에서 전부 매도 # 종가/목표가: 수익률
                     
                     1) # 매수 진행 안하면 그대로

# 누적 곱 계산(cumprod) => 누적 수익률
df1['hpr'] = df1['ror'].cumprod() 

# Draw Down(하락폭) 계산 (누적 최대값과 현재 hpr차이 / 누적최대값 * 100)
df1['dd'] = (df1['hpr'].cummax() - df1['hpr']) / df1['hpr'].cummax() * 100



# MDD 계산 (DD중 최대값)
print("MDD(%): ", df1['dd'].max())





print(df1)
# print(df1.head())


df1.reset_index(inplace=True)

df1['index'] = df1['index'].apply(lambda a: pd.to_datetime(a).date())

df1.to_excel("df1.xlsx")

# 2번 LSTM + 변동성 투자 전략

In [None]:
df2_2 = pyupbit.get_ohlcv("KRW-BTC",count=86, to='20220501') # 86일 test 


df2_2_sc = pd.DataFrame(scaler.fit_transform(df2_2.iloc[:,0:-1]))




real_test_feature = df2_2_sc
real_test_label = df2_2_sc.iloc[:,3]

# test dataset (실제 예측 해볼 데이터)
real_test_feature, real_test_label = make_dataset(real_test_feature, real_test_label, 5)

real_test_feature.shape

# 예측
norm_pred = new_model.predict(real_test_feature)

real_df_pred = pd.DataFrame(index=range(0,real_test_feature.shape[0]), columns=['open', 'high','low','close','volume'])
real_df_pred = real_df_pred.fillna(0)
real_df_pred['close'] = norm_pred

pred_value = scaler.inverse_transform(real_df_pred)[:,3]

import pyupbit
import numpy as np

# OHLCV
df2 = pyupbit.get_ohlcv("KRW-BTC",count=81, to='20220501') # 81일의 예측 결과
print(len(df2))

## 매수가 구하기 위해 작성된 코드 

# 변동폭 * k계사, (고가-저가)*k값
df2['range'] = (df2['high'] - df2['low']) * 0.2
# target(매수가), range 컬럼 한칸씩 밑으로 내림 
df2['target'] = df2['open'] + df2['range'].shift(1) # 어제 변동폭

# LSTM예측 결과를 df2에 붙여넣기 
df2['LSTM_pred'] = pred_value


fee = 0.0005
# 안정적인 투자 위함이다.
# ror(수익률), np.where(조건문, 참일때 값, 거짓일때 값)
df2['ror'] = np.where(((df2['high'] > df2['target']) & (df2['target'] < df2['LSTM_pred'])), # 매수 진행된 상황 # 오늘 예측 종가가 오늘의 시가보다는 커야한다는 조건 추가(상승장인지) 
                     (df2['close'] - fee*( df2['close'] + df2['target'] ))/ df2['target'], # 종가에서 전부 매도 # 종가/목표가: 수익률
                     1) # 매수 진행 안하면 그대로

# 누적 곱 계산(cumprod) => 누적 수익률
df2['hpr'] = df2['ror'].cumprod()

# Draw Down(하락폭) 계산 (누적 최대값과 현재 hpr차이 / 누적최대값 * 100)
df2['dd'] = (df2['hpr'].cummax() - df2['hpr']) / df2['hpr'].cummax() * 100


# MDD 계산 (DD중 최대값)
print("MDD(%): ", df2['dd'].max())

print(df2)





df2.reset_index(inplace=True)

df2['index'] = df2['index'].apply(lambda a: pd.to_datetime(a).date())

df2.to_excel("df2.xlsx")


# 3번 buy and hold

In [None]:
df3 = pyupbit.get_ohlcv("KRW-BTC",count=81, to='20220501') # 7일 # ohlcv 당일시가고가저가종가거래량

df3.reset_index(inplace=True)

df3['index'] = df3['index'].apply(lambda a: pd.to_datetime(a).date())

In [None]:
# 일일 수익률 
df3['ror'] = ((df3['close']*100).pct_change())+1


# 누적 수익률 
df3['hpr'] = df3['ror'].cumprod()


# Draw Down(하락폭) 계산 (누적 최대값과 현재 hpr차이 / 누적최대값 * 100)
df3['dd'] = (df3['hpr'].cummax() - df3['hpr']) / df3['hpr'].cummax() * 100


# MDD 계산 (DD중 최대값)
print("MDD(%): ", df3['dd'].max())

print(df3)

df3.to_excel("df3.xlsx")



In [None]:
# 평균 수익률
print(sum(df3['ror'].dropna()) / len(df3['ror'].dropna()))

# 표준편차
print(df3['ror'].dropna().std())