Read All Libraries

In [None]:

import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error,mean_squared_error,explained_variance_score
from math import sqrt
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

Read Data

In [None]:
data=pd.read_csv('/content/drive/MyDrive/dataset/bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv')

In [None]:
data

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_BTC,Volume_Currency,Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.000000,4.390000
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,
...,...,...,...,...,...,...,...,...
1048570,1388232120,734.60,734.60,730.00,734.55,1.789687,1313.640757,734.005698
1048571,1388232180,734.55,734.55,730.71,730.71,0.110236,80.802051,732.991499
1048572,1388232240,734.40,734.40,730.51,730.51,0.554786,407.247985,734.063488
1048573,1388232300,730.51,733.63,730.51,731.10,0.620446,453.777190,731.372707


Check columns contain Nan values

In [None]:
data.isnull().any()

Timestamp          False
Open                True
High                True
Low                 True
Close               True
Volume_BTC          True
Volume_Currency     True
Weighted_Price      True
dtype: bool

Drop Nan values

In [None]:
data.dropna(inplace=True)

In [None]:

data.isnull().any()

Timestamp          False
Open               False
High               False
Low                False
Close              False
Volume_BTC         False
Volume_Currency    False
Weighted_Price     False
dtype: bool

In [None]:
data.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_BTC,Volume_Currency,Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
478,1325346600,4.39,4.39,4.39,4.39,48.0,210.72,4.39
547,1325350740,4.5,4.57,4.5,4.57,37.862297,171.380337,4.526411
548,1325350800,4.58,4.58,4.58,4.58,9.0,41.22,4.58
1224,1325391360,4.58,4.58,4.58,4.58,1.502,6.87916,4.58


Convert Timestamp into date format

In [None]:
data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
data['Timestamp'] = pd.to_datetime(data['Timestamp']).dt.to_period('m')
data.set_index(data['Timestamp'], inplace=True)
data.drop(['Timestamp'],axis=1,inplace=True)
data=data.to_timestamp()
# data['Timestamp'] = pd.to_datetime(data['Timestamp'], unit='s')
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume_BTC,Volume_Currency,Weighted_Price
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-12-01,4.39,4.39,4.39,4.39,0.455581,2.000000,4.390000
2011-12-01,4.39,4.39,4.39,4.39,48.000000,210.720000,4.390000
2011-12-01,4.50,4.57,4.50,4.57,37.862297,171.380337,4.526411
2011-12-01,4.58,4.58,4.58,4.58,9.000000,41.220000,4.580000
2012-01-01,4.58,4.58,4.58,4.58,1.502000,6.879160,4.580000
...,...,...,...,...,...,...,...
2013-12-01,734.60,734.60,730.00,734.55,1.789687,1313.640757,734.005698
2013-12-01,734.55,734.55,730.71,730.71,0.110236,80.802051,732.991499
2013-12-01,734.40,734.40,730.51,730.51,0.554786,407.247985,734.063488
2013-12-01,730.51,733.63,730.51,731.10,0.620446,453.777190,731.372707


In [None]:
data=data.reset_index()

Split Timestamp into Day,Month and Year column

In [None]:
data['day']=pd.to_datetime(data['Timestamp'],format="%d-%m-%Y").dt.day
data['month']=pd.to_datetime(data['Timestamp'],format="%d-%m-%Y").dt.month
data['year']=pd.to_datetime(data['Timestamp'],format="%d-%m-%Y").dt.year
data.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_BTC,Volume_Currency,Weighted_Price,day,month,year
0,2011-12-01,4.39,4.39,4.39,4.39,0.455581,2.0,4.39,1,12,2011
1,2011-12-01,4.39,4.39,4.39,4.39,48.0,210.72,4.39,1,12,2011
2,2011-12-01,4.5,4.57,4.5,4.57,37.862297,171.380337,4.526411,1,12,2011
3,2011-12-01,4.58,4.58,4.58,4.58,9.0,41.22,4.58,1,12,2011
4,2012-01-01,4.58,4.58,4.58,4.58,1.502,6.87916,4.58,1,1,2012


In [None]:
data.drop(["Timestamp"],axis=1,inplace=True)
data


Unnamed: 0,Open,High,Low,Close,Volume_BTC,Volume_Currency,Weighted_Price,day,month,year
0,4.39,4.39,4.39,4.39,0.455581,2.000000,4.390000,1,12,2011
1,4.39,4.39,4.39,4.39,48.000000,210.720000,4.390000,1,12,2011
2,4.50,4.57,4.50,4.57,37.862297,171.380337,4.526411,1,12,2011
3,4.58,4.58,4.58,4.58,9.000000,41.220000,4.580000,1,12,2011
4,4.58,4.58,4.58,4.58,1.502000,6.879160,4.580000,1,1,2012
...,...,...,...,...,...,...,...,...,...,...
342164,734.60,734.60,730.00,734.55,1.789687,1313.640757,734.005698,1,12,2013
342165,734.55,734.55,730.71,730.71,0.110236,80.802051,732.991499,1,12,2013
342166,734.40,734.40,730.51,730.51,0.554786,407.247985,734.063488,1,12,2013
342167,730.51,733.63,730.51,731.10,0.620446,453.777190,731.372707,1,12,2013


Split x and y

In [None]:
x=data.drop(['Weighted_Price'],axis=1)

In [None]:
y=data.Weighted_Price

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

split x,y into train test split

In [None]:

xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=.2)

Model Creation

In [None]:
model=LinearRegression()
# m=RandomForestRegressor()
# m1=DecisionTreeRegressor()

In [None]:
model.fit(xtrain,ytrain)
#m.fit(xtrain,ytrain)
#m1.fit(xtrain,ytrain)


LinearRegression()

Check Accuracy

In [None]:
model.score(xtest,ytest)
# m.score(xtest,ytest)
# m1.score(xtest,ytest)


0.9999954272273276

Prediction

In [None]:
model.predict([[4.39,4.39,4.39,4.39,0.455581,2.000000,1,12,2011]])

  "X does not have valid feature names, but"


array([4.38832277])

In [None]:
y_pred=model.predict(xtest)

In [None]:
evs=explained_variance_score(ytest,y_pred)
print(evs)
mse=mean_absolute_error(ytest,y_pred)
print(mse)
mse=mean_squared_error(ytest,y_pred)
print(mse)
rmse=sqrt(mse)
print(rmse)

0.9999954272285082
0.17443704857959152
0.33572587987116675
0.5794185705266675


In [None]:
from sklearn import metrics
metrics.r2_score(ytest,y_pred)

0.9999954272273276

In [None]:
import pickle

Pickle creation

In [None]:
file=open("bitcoin.pkl","wb")
pickle.dump(model,file)

In [None]:
a=open("bitcoin.pkl","rb")
pickle=pickle.load(a)

In [None]:
y_pred=pickle.predict(xtest)

In [None]:
pickle.score(xtest,ytest)

0.9999954272273276

In [None]:
pickle.predict([[4.39,4.39,4.39,4.39,0.455581,2.000000,1,12,2011]])

  "X does not have valid feature names, but"


array([4.38832277])