# 1. Import the libraries

In [63]:
# First we will import the necessary Library 

import os 
import pandas as pd 
import numpy as np 
import math 
import datetime as dt

# For evaluation we will use these library 

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

# For model building we will use these library

import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM

# For Plotting we will use these library

import matplotlib.pyplot as plt 
from itertools import cycle 
import plotly.graph_objects as go 
import plotly.express as px
from plotly.subplots import make_subplots


# 2. Loading Dataset

In [64]:
# Load our dataset

maindf = pd.read_csv('BTC-USD.lstm.csv')

In [65]:
print('Total number of days present in the dataset:',maindf.shape[0])
print('Total number of fields present in the dataset:',maindf.shape[1])

Total number of days present in the dataset: 609
Total number of fields present in the dataset: 7


In [66]:
maindf.shape

(609, 7)

In [67]:
maindf.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-01-01,28994.009766,29600.626953,28803.585938,29374.152344,29374.152344,40730301359
1,2021-01-02,29376.455078,33155.117188,29091.181641,32127.267578,32127.267578,67865420765
2,2021-01-03,32129.408203,34608.558594,32052.316406,32782.023438,32782.023438,78665235202
3,2021-01-04,32810.949219,33440.21875,28722.755859,31971.914063,31971.914063,81163475344
4,2021-01-05,31977.041016,34437.589844,30221.1875,33992.429688,33992.429688,67547324782


In [68]:
maindf.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
604,2022-08-28,20041.035156,20139.054688,19616.814453,19616.814453,19616.814453,24366810591
605,2022-08-29,19615.154297,20357.462891,19600.785156,20297.994141,20297.994141,32637854078
606,2022-08-30,20298.611328,20542.644531,19617.640625,19796.808594,19796.808594,34483360283
607,2022-08-31,19799.582031,20420.990234,19799.582031,20049.763672,20049.763672,33225232872
608,2022-09-01,20050.498047,20198.390625,19653.96875,20127.140625,20127.140625,30182031010


In [69]:
maindf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 609 entries, 0 to 608
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       609 non-null    object 
 1   Open       609 non-null    float64
 2   High       609 non-null    float64
 3   Low        609 non-null    float64
 4   Close      609 non-null    float64
 5   Adj Close  609 non-null    float64
 6   Volume     609 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 33.4+ KB


In [70]:
maindf.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,609.0,609.0,609.0,609.0,609.0,609.0
mean,41680.466175,42766.377595,40417.969539,41658.482101,41658.482101,39983260000.0
std,11861.072043,12105.295986,11596.530019,11875.692316,11875.692316,21959860000.0
min,19010.902344,19371.748047,17708.623047,19017.642578,19017.642578,13736560000.0
25%,33811.242188,34802.742188,32270.175781,33746.003906,33746.003906,27055800000.0
50%,41501.746094,42479.613281,40625.632813,41557.902344,41557.902344,34163220000.0
75%,49354.855469,50797.164063,47857.496094,49339.175781,49339.175781,48597430000.0
max,67549.734375,68789.625,66382.0625,67566.828125,67566.828125,350967900000.0


### Checking for Null Values

In [71]:
print('Null Values:',maindf.isnull().values.sum())

Null Values: 0


In [72]:
# Final shape of the dataset after dealing with null values

maindf.shape

(609, 7)

# 3. EDA (Explanatory Data Analysis)

In [73]:
# Printing the start date and End date of the datset 

sd = maindf.iloc[0][0]
ed = maindf.iloc[-1][0]

print('Starting Date',sd)
print('Ending Date',ed)

Starting Date 2021-01-01
Ending Date 2022-09-01


### Bitcoin Analisys from 2021-01-01 to 2022-09-01

In [74]:
closedf = maindf[['Date','Close']]
print("Shape of close dataframe:", closedf.shape)

Shape of close dataframe: (609, 2)


In [75]:
closedf

Unnamed: 0,Date,Close
0,2021-01-01,29374.152344
1,2021-01-02,32127.267578
2,2021-01-03,32782.023438
3,2021-01-04,31971.914063
4,2021-01-05,33992.429688
...,...,...
604,2022-08-28,19616.814453
605,2022-08-29,20297.994141
606,2022-08-30,19796.808594
607,2022-08-31,20049.763672


In [76]:
closedf = closedf[closedf['Date'] > '2021-01-01']
close_stock = closedf.copy()
print("Total data for prediction: ",closedf.shape[0])

Total data for prediction:  608


In [77]:
closedf

Unnamed: 0,Date,Close
1,2021-01-02,32127.267578
2,2021-01-03,32782.023438
3,2021-01-04,31971.914063
4,2021-01-05,33992.429688
5,2021-01-06,36824.363281
...,...,...
604,2022-08-28,19616.814453
605,2022-08-29,20297.994141
606,2022-08-30,19796.808594
607,2022-08-31,20049.763672


In [78]:
fig = px.line(closedf, x=closedf.Date, y=closedf.Close,labels={'date':'Date','close':'Close Price'})
fig.update_traces(marker_line_width=2, opacity=0.8, marker_line_color='orange')
fig.update_layout(title_text='Bitcoin close price from 2021-01-01 to 2022-09-01', plot_bgcolor='white', font_size=15, font_color='black')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()