Library

In [33]:
#import libraries

import pandas as pd
import prophet as Prophet
import yfinance as yf
import plotly.graph_objects as go

import warnings

from datetime import datetime, timedelta

In [34]:
warnings.filterwarnings("ignore")
pd.options.display.float_format = "${:,.2f}".format

Get ETH Data

In [35]:
today = datetime.today().strftime("%Y-%m-%d")
start_date = "2016-01-01"

# Using Pandas dataframe
eth_df = yf.download("ETH-USD", start_date, today)

# Load the 10 latest entries
eth_df.tail()

# Tags: Date, Open, High, Low, Close, Adj Close, Volume

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-08-30,"$1,553.19","$1,600.46","$1,480.83","$1,523.84","$1,523.84",21835784470
2022-08-31,"$1,524.29","$1,612.36","$1,524.29","$1,553.68","$1,553.68",20591680941
2022-09-01,"$1,553.76","$1,593.08","$1,520.19","$1,586.18","$1,586.18",16434276817
2022-09-02,"$1,586.02","$1,643.18","$1,551.88","$1,577.22","$1,577.22",17708478709
2022-09-03,"$1,572.53","$1,579.29","$1,542.92","$1,548.30","$1,548.30",12824674304


Analysis of ETH Data

In [36]:
# Summarise datapoints and datatype for each column and memory usage
eth_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1760 entries, 2017-11-09 to 2022-09-03
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1760 non-null   float64
 1   High       1760 non-null   float64
 2   Low        1760 non-null   float64
 3   Close      1760 non-null   float64
 4   Adj Close  1760 non-null   float64
 5   Volume     1760 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 96.2 KB


In [37]:
# Check for missing (null) values
eth_df.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [38]:
# Separate features into categorical and numerical
cat_df = eth_df.select_dtypes(include=["object"]) #categorical
num_df = eth_df.select_dtypes(exclude=["object"]) #numerical

def printColumnTypes(catdf, numdf):
    print("Categorical columns: ")
    for col in catdf:
        print(f"{col}")
    print("Numerical columns: ")
    for col in numdf:
        print(f"{col}")
        
printColumnTypes(cat_df, num_df)

Categorical columns: 
Numerical columns: 
Open
High
Low
Close
Adj Close
Volume


In [39]:
# Notice missing date column
eth_df.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [40]:
# Reset index to have missing date column
eth_df = eth_df.reset_index()

eth_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [41]:
# Note to self on limitation of prophet library
# Only allowed to have 2 columns in dataframe - "ds" & "y"
df = eth_df[["Date", "Open"]]

new_names = {
    "Date": "ds",
    "Open": "y"
}

df = df.rename(new_names)

In [42]:
df.tail()

Unnamed: 0,Date,Open
1755,2022-08-30,"$1,553.19"
1756,2022-08-31,"$1,524.29"
1757,2022-09-01,"$1,553.76"
1758,2022-09-02,"$1,586.02"
1759,2022-09-03,"$1,572.53"


In [43]:
# plot the open price

x = df["ds"]
y = df["y"]

fig = go.Figure()

fig.add_trace(go.Scatter(x=x, y=y))

# Set title
fig.update_layout(
    title_text="Time series plot of Ethereum Open Price",
)

fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list(
                [
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all"),
                ]
            )
        ),
        rangeslider=dict(visible=True),
        type="date",
    )
)

KeyError: 'ds'