# 1. Import the libraries

In [17]:
# First we will import the necessary Library 

import os 
import pandas as pd 
import numpy as np 
import math 
import datetime as dt

# For evaluation we will use these library 

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

# For model building we will use these library

import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM

# For Plotting we will use these library

import matplotlib.pyplot as plt 
from itertools import cycle 
import plotly.graph_objects as go 
import plotly.express as px
from plotly.subplots import make_subplots

# 2. Loading Dataset

In [18]:
# Load our dataset
# Note it should be in same dir 


maindf = pd.read_csv('ETH-USD.lstm.csv')

In [19]:
print('Total number of days present in the dataset:',maindf.shape[0])
print('Total number of fields present in the dataset:',maindf.shape[1])

Total number of days present in the dataset: 609
Total number of fields present in the dataset: 7


In [20]:
maindf.shape

(609, 7)

In [21]:
maindf.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-01-01,737.708374,749.201843,719.792236,730.367554,730.367554,13652004358
1,2021-01-02,730.402649,786.798462,718.109497,774.534973,774.534973,19740771179
2,2021-01-03,774.511841,1006.565002,771.561646,975.50769,975.50769,45200463368
3,2021-01-04,977.058838,1153.189209,912.305359,1040.233032,1040.233032,56945985763
4,2021-01-05,1041.498779,1129.37146,986.811279,1100.006104,1100.006104,41535932781


In [22]:
maindf.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
604,2022-08-28,1491.206787,1505.791992,1430.547363,1430.547363,1430.547363,12823572918
605,2022-08-29,1430.439453,1556.30957,1427.728394,1553.037354,1553.037354,17965837488
606,2022-08-30,1553.188965,1600.461182,1480.831787,1523.838867,1523.838867,21835784470
607,2022-08-31,1524.286499,1612.358887,1524.286499,1553.684937,1553.684937,20591680941
608,2022-09-01,1553.756348,1593.082764,1520.188354,1586.176758,1586.176758,16434276817


In [23]:
maindf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 609 entries, 0 to 608
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       609 non-null    object 
 1   Open       609 non-null    float64
 2   High       609 non-null    float64
 3   Low        609 non-null    float64
 4   Close      609 non-null    float64
 5   Adj Close  609 non-null    float64
 6   Volume     609 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 33.4+ KB


In [24]:
maindf.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,609.0,609.0,609.0,609.0,609.0,609.0
mean,2588.476077,2673.586008,2491.0376,2589.282099,2589.282099,23150750000.0
std,958.331907,978.431764,933.893174,955.254438,955.254438,11017930000.0
min,730.402649,749.201843,718.109497,730.367554,730.367554,6532997000.0
25%,1802.543823,1840.058838,1733.639893,1803.91333,1803.91333,15998760000.0
50%,2559.660645,2641.094971,2455.59375,2559.562988,2559.562988,20093900000.0
75%,3261.291504,3346.526367,3157.224121,3261.91626,3261.91626,27363220000.0
max,4810.071289,4891.70459,4718.039063,4812.087402,4812.087402,84482910000.0


### Checking for Null Values

In [25]:
print('Null Values:',maindf.isnull().values.sum())

Null Values: 0


In [26]:
# Final shape of the dataset after dealing with null values

maindf.shape

(609, 7)

# 3. EDA (Explanatory Data Analysis)

In [27]:
# Printing the start date and End date of the datset 

sd = maindf.iloc[0][0]
ed = maindf.iloc[-1][0]

print('Starting Date',sd)
print('Ending Date',ed)

Starting Date 2021-01-01
Ending Date 2022-09-01


### Ethereum Analisys from 2021-01-01 to 2022-09-01

In [28]:
closedf = maindf[['Date','Close']]
print("Shape of close dataframe:", closedf.shape)

Shape of close dataframe: (609, 2)


In [29]:
closedf

Unnamed: 0,Date,Close
0,2021-01-01,730.367554
1,2021-01-02,774.534973
2,2021-01-03,975.507690
3,2021-01-04,1040.233032
4,2021-01-05,1100.006104
...,...,...
604,2022-08-28,1430.547363
605,2022-08-29,1553.037354
606,2022-08-30,1523.838867
607,2022-08-31,1553.684937


In [30]:
closedf = closedf[closedf['Date'] > '2021-01-01']
close_stock = closedf.copy()
print("Total data for prediction: ",closedf.shape[0])

Total data for prediction:  608


In [31]:
closedf

Unnamed: 0,Date,Close
1,2021-01-02,774.534973
2,2021-01-03,975.507690
3,2021-01-04,1040.233032
4,2021-01-05,1100.006104
5,2021-01-06,1207.112183
...,...,...
604,2022-08-28,1430.547363
605,2022-08-29,1553.037354
606,2022-08-30,1523.838867
607,2022-08-31,1553.684937


In [32]:
fig = px.line(closedf, x=closedf.Date, y=closedf.Close,labels={'date':'Date','close':'Close Price'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Ethereum close price from 2021-01-01 to 2022-09-01', plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()