In [1]:
#!pip install yfinance

In [2]:
#!pip install plotly

In [3]:
#!pip install --upgrade nbformat

In [4]:
# load libraries
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import plotly.graph_objects as go
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, plot_components_plotly
import warnings
warnings.filterwarnings('ignore')
pd.options.display.float_format = '{:,.2f}'.format

# Settings

In [5]:
symbol = 'BTC-USD'
#symbol = 'ETH-USD'

# Import

In [6]:
# get data
today = datetime.today().strftime('%Y-%m-%d')
start_date = '2016-01-01'
eth_df = yf.download(symbol, start_date, today)
eth_df.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-02-07,42406.78,44401.86,41748.16,43840.29,43840.29,28641855926
2022-02-08,43854.65,45293.87,42807.84,44118.45,44118.45,33079398868
2022-02-09,44096.7,44727.8,43232.97,44338.8,44338.8,23245887300
2022-02-10,44347.8,45661.17,43402.81,43565.11,43565.11,32142048537
2022-02-11,43474.11,43810.17,42950.7,43707.96,43707.96,31192219648


# Analyse

In [7]:
# analysis
eth_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2234 entries, 2016-01-01 to 2022-02-11
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       2234 non-null   float64
 1   High       2234 non-null   float64
 2   Low        2234 non-null   float64
 3   Close      2234 non-null   float64
 4   Adj Close  2234 non-null   float64
 5   Volume     2234 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 122.2 KB


In [8]:
eth_df.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [9]:
eth_df.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

# Transform

In [10]:
# We’ll reset the index, and we can have our Date as a column.
eth_df.reset_index(inplace=True)
eth_df.columns 

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [11]:
df = eth_df[["Date", "Open"]]
new_names = {
    "Date": "ds", 
    "Open": "y",
}
df.rename(columns=new_names, inplace=True)

### create labels

In [12]:
# triple barrier label setting
threshold=0.03
T=3

In [13]:
from process_raw import form_label
tbl = form_label(df, ['ds','y'], threshold_type='ratio', threshold=threshold, T=T)

# target variable is the label
df['y'] = tbl

# remove -2
df = df[df['y'] != -2]

In [14]:
df.tail()

Unnamed: 0,ds,y
2226,2022-02-04,1.0
2227,2022-02-05,1.0
2228,2022-02-06,1.0
2229,2022-02-07,1.0
2230,2022-02-08,0.0


In [15]:
# plot the open price labels
x = df["ds"]
y = df["y"]
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        name= 'Open price Label',
        mode='markers',
        x=x,
        y=y,
        marker=dict(
            # symbol = 'triangle-up',
            color='rgba(20, 200, 20, 0.5)',
            size=3,
            line=dict(
                color='Green',
                width=1
            )
        ),
        showlegend=True
    )
)

# Set title
fig.update_layout(
    title_text="Time series plot of " + symbol + " Open Price Label",
)

# Model

In [47]:
m = Prophet(
    growth='flat',
    changepoint_prior_scale=1000,
    # seasonality_prior_scale=0.01,
    # holidays_prior_scale=0.01,
    seasonality_mode='multiplicative',
    changepoint_range=0.99,
)

m.fit(df)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x1d75966e880>

In [48]:
future = m.make_future_dataframe(periods = 10, include_history=True)
future

Unnamed: 0,ds
0,2016-01-01
1,2016-01-02
2,2016-01-03
3,2016-01-04
4,2016-01-05
...,...
2236,2022-02-14
2237,2022-02-15
2238,2022-02-16
2239,2022-02-17


In [49]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2016-01-01,0.18,-0.89,1.19
1,2016-01-02,0.17,-0.88,1.20
2,2016-01-03,0.09,-1.01,1.08
3,2016-01-04,0.08,-0.97,1.07
4,2016-01-05,0.06,-1.01,1.12
...,...,...,...,...
2236,2022-02-14,0.32,-0.73,1.30
2237,2022-02-15,0.31,-0.78,1.35
2238,2022-02-16,0.30,-0.65,1.33
2239,2022-02-17,0.31,-0.72,1.33


In [50]:
next_day = (datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')
forecast[forecast['ds'] == next_day]['yhat'].item()

0.3798813229152437

In [51]:
plot_plotly(m, forecast)

In [52]:
plot_components_plotly(m, forecast)

# Conclusion
- this approach does not seem to work