# In this we will build a Bitcoin regression model

In [1]:
# Importing all the libraries which will be in use
import pandas as pd
import numpy as np
import sweetviz as sv
import seaborn as sns

In [2]:
# Using pandas to read the dataset
data = pd.read_csv(r'bitstampUSD_1-min_data_2012-01-01_to_2020-12-31.csv', nrows=1000000)

In [3]:
# Reading top 5 rows of our dataset 
data.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


In [4]:
# Removing Timestamp column, because it will not help us in our model
data.drop('Timestamp',axis=1, inplace=True)

In [5]:
data.head()

Unnamed: 0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,,,,,,,
2,,,,,,,
3,,,,,,,
4,,,,,,,


In [6]:
# Getting dtyoe of all the columns in our dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 7 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   Open               295955 non-null  float64
 1   High               295955 non-null  float64
 2   Low                295955 non-null  float64
 3   Close              295955 non-null  float64
 4   Volume_(BTC)       295955 non-null  float64
 5   Volume_(Currency)  295955 non-null  float64
 6   Weighted_Price     295955 non-null  float64
dtypes: float64(7)
memory usage: 53.4 MB


In [7]:
# Checling null values
data.isnull().sum()

Open                 704045
High                 704045
Low                  704045
Close                704045
Volume_(BTC)         704045
Volume_(Currency)    704045
Weighted_Price       704045
dtype: int64

In [8]:
# Filling null values with median values respect to the columns 
data['Open'] = data['Open'].fillna(data['Open'].median())
data['High'] = data['High'].fillna(data['High'].median())
data['Low'] = data['Low'].fillna(data['Low'].median())
data['Close'] = data['Close'].fillna(data['Close'].median())
data['Volume_(BTC)'] = data['Volume_(BTC)'].fillna(data['Volume_(BTC)'].median())
data['Volume_(Currency)'] = data['Volume_(Currency)'].fillna(data['Volume_(Currency)'].median())
data['Weighted_Price'] = data['Weighted_Price'].fillna(data['Weighted_Price'].median())

In [9]:
data

Unnamed: 0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,4.39,4.39,4.39,4.39,0.455581,2.000000,4.390000
1,115.99,116.00,115.90,115.98,2.854470,242.509999,115.970000
2,115.99,116.00,115.90,115.98,2.854470,242.509999,115.970000
3,115.99,116.00,115.90,115.98,2.854470,242.509999,115.970000
4,115.99,116.00,115.90,115.98,2.854470,242.509999,115.970000
...,...,...,...,...,...,...,...
999995,835.00,836.00,833.50,833.50,1.703059,1421.998003,834.967190
999996,833.50,833.50,833.00,833.00,2.056183,1713.679502,833.427356
999997,832.91,833.50,832.00,832.00,5.918314,4926.382001,832.396155
999998,832.00,833.00,831.69,833.00,6.147849,5120.985402,832.971875


In [10]:
data.isnull().sum()

Open                 0
High                 0
Low                  0
Close                0
Volume_(BTC)         0
Volume_(Currency)    0
Weighted_Price       0
dtype: int64

In [11]:
# Using sweetviz library for visualization
Bitcoin_featues_analysis = sv.analyze(data)

                                             |                                             | [  0%]   00:00 ->…

In [14]:
Bitcoin_featues_analysis.show_html("Bitcoin_featues_analysis.html")

Report Bitcoin_featues_analysis.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


In [15]:
# Dividing dataset into X and y 
X = data.drop('Close', axis=1)
y = data['Close']

In [16]:
X

Unnamed: 0,Open,High,Low,Volume_(BTC),Volume_(Currency),Weighted_Price
0,4.39,4.39,4.39,0.455581,2.000000,4.390000
1,115.99,116.00,115.90,2.854470,242.509999,115.970000
2,115.99,116.00,115.90,2.854470,242.509999,115.970000
3,115.99,116.00,115.90,2.854470,242.509999,115.970000
4,115.99,116.00,115.90,2.854470,242.509999,115.970000
...,...,...,...,...,...,...
999995,835.00,836.00,833.50,1.703059,1421.998003,834.967190
999996,833.50,833.50,833.00,2.056183,1713.679502,833.427356
999997,832.91,833.50,832.00,5.918314,4926.382001,832.396155
999998,832.00,833.00,831.69,6.147849,5120.985402,832.971875


In [17]:
y

0           4.39
1         115.98
2         115.98
3         115.98
4         115.98
           ...  
999995    833.50
999996    833.00
999997    832.00
999998    833.00
999999    831.69
Name: Close, Length: 1000000, dtype: float64

In [18]:
# Splitting data into test and trainn set usin sklearn library
from sklearn.model_selection import train_test_split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [20]:
Train_data= X_train

In [21]:
Test_data = X_test

In [22]:
# Comparing train and test dataset 
Bitcoin_datsasts_analysis = sv.compare([Train_data, "X_train"], [Test_data, "X_test"])

                                             |                                             | [  0%]   00:00 ->…

In [23]:
Bitcoin_datsasts_analysis.show_html("Bitcoin_datsasts_analysis.html")

Report Bitcoin_datsasts_analysis.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


In [22]:
#Applying Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor

In [23]:
regressor = RandomForestRegressor()

In [24]:
regressor.fit(X_train,y_train)

RandomForestRegressor()

In [26]:
y_pred = regressor.predict(X_test)

In [28]:
# Importing r2 score library from metrics
from sklearn.metrics import r2_score

In [29]:
# r2 score helps us in determination of coefficient of determination
accuracy = r2_score(y_test,y_pred)

In [30]:
accuracy

0.999977371590558

# After applying Random Forest Regressor, Accuracy of our model is 99.99.