In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

df = pd.read_excel('Bitcoin Data.xlsx')
df.head()

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Bitcoin,BTC,2013-04-29 23:59:59,147.488007,134.0,134.444,144.539993,0.0,1603769000.0
1,2,Bitcoin,BTC,2013-04-30 23:59:59,146.929993,134.050003,144.0,139.0,0.0,1542813000.0
2,3,Bitcoin,BTC,2013-05-01 23:59:59,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0
3,4,Bitcoin,BTC,2013-05-02 23:59:59,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0
4,5,Bitcoin,BTC,2013-05-03 23:59:59,108.127998,79.099998,106.25,97.75,0.0,1085995000.0


In [None]:
missing_values = df.isnull().sum()
print("Missing Values:\n", missing_values)

duplicates = df.duplicated().sum()
print("\nNumber of Duplicate Rows:", duplicates)

summary = df.describe()
print("\nSummary Statistics:\n", summary)

Missing Values:
 SNo          0
Name         0
Symbol       0
Date         0
High         0
Low          0
Open         0
Close        0
Volume       0
Marketcap    0
dtype: int64

Number of Duplicate Rows: 0

Summary Statistics:
                SNo                           Date          High           Low  \
count  2991.000000                           2991   2991.000000   2991.000000   
mean   1496.000000  2017-06-02 23:59:59.000000256   6893.326038   6486.009539   
min       1.000000            2013-04-29 23:59:59     74.561096     65.526001   
25%     748.500000            2015-05-17 11:59:59    436.179001    422.879486   
50%    1496.000000            2017-06-02 23:59:59   2387.610107   2178.500000   
75%    2243.500000            2019-06-20 11:59:59   8733.926948   8289.800459   
max    2991.000000            2021-07-06 23:59:59  64863.098908  62208.964366   
std     863.571653                            NaN  11642.832456  10869.032130   

               Open         Close      

In [None]:
from datetime import datetime
df['target'] = (df['High'] + df['Low'])/2
df['Date'] = pd.to_numeric(df['Date'], downcast='integer', errors='coerce')
df.head()

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap,target
0,1,Bitcoin,BTC,1367279999000000000,147.488007,134.0,134.444,144.539993,0.0,1603769000.0,140.744003
1,2,Bitcoin,BTC,1367366399000000000,146.929993,134.050003,144.0,139.0,0.0,1542813000.0,140.489998
2,3,Bitcoin,BTC,1367452799000000000,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0,123.805
3,4,Bitcoin,BTC,1367539199000000000,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0,108.940948
4,5,Bitcoin,BTC,1367625599000000000,108.127998,79.099998,106.25,97.75,0.0,1085995000.0,93.613998


In [None]:
X = df[['Date', 'High', 'Low', 'Volume', 'Marketcap']].copy()
Y = df['target']

# Scale Data

for cols in X.columns:
  X[cols] = (X[cols] - np.mean(X[cols].values)) / np.std(X[cols].values)

print(X)

train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.3, random_state=42)

          Date      High       Low    Volume  Marketcap
0    -1.731472 -0.579495 -0.584511 -0.577489  -0.565517
1    -1.730314 -0.579543 -0.584507 -0.577489  -0.565806
2    -1.729155 -0.580148 -0.586930 -0.577489  -0.566962
3    -1.727997 -0.581376 -0.588350 -0.577489  -0.567580
4    -1.726839 -0.582876 -0.589563 -0.577489  -0.567972
...        ...       ...       ...       ...        ...
2986  1.726839  2.323385  2.418713  1.473205   2.439800
2987  1.727997  2.406684  2.476871  0.713638   2.508498
2988  1.729155  2.495020  2.568318  0.742250   2.563664
2989  1.730314  2.438905  2.459476  0.837414   2.426739
2990  1.731472  2.417789  2.495019  0.825749   2.470374

[2991 rows x 5 columns]


In [None]:
def gradient_descent(w, b, alpha, X, Y, y_pred):

   error = Y - y_pred
   cost = np.mean(error**2)
   absolute_cost = np.mean(abs(error))

   n = len(Y)
   deriv_w = -2 * np.dot(X.T, error) / n
   deriv_b = -2 * np.sum(error) / n

   w -= alpha * deriv_w
   b -= alpha * deriv_b

   return w, b, cost, absolute_cost


def linear_regression(X, Y, w, b, alpha, decay, iterations):

  for i in range(iterations):
    y_pred = X.dot(w) + b
    w, b, cost, abs_cost = gradient_descent(w, b, alpha, X, Y, y_pred)

    if i % 500 == 0:
      alpha *= decay
      print("Loss (MSE): ", cost)
      print("Loss (MAE): ", abs_cost, "\n")
      print("Iteration:", i, "Weights:", w, "Bias:", b)

  return w, b

In [None]:
# Linear Regression

weights = np.random.randn(X.shape[1]) * 0.01
bias = 0
alpha = 0.23405
decay = 0.99
iterations = 20000

weights, bias = linear_regression(train_X, train_Y, weights, bias, alpha, decay, iterations)

# Getting predictions
y_pred = test_X.dot(weights) + bias

Loss (MSE):  175442236.70519295
Loss (MAE):  6764.139542332115 

Iteration: 0 Weights: [3579.55675381 5425.83412855 5403.83656812 4430.29908236 5417.35101659] Bias: 3166.293719765663
Loss (MSE):  2.2286045872519407e+21
Loss (MAE):  32229713492.663895 

Iteration: 500 Weights: [8.69085123e+09 1.09905978e+10 1.09441843e+10 1.01984654e+10
 1.09705948e+10] Bias: 120541347.87518296
Loss (MSE):  9.073985669991643e+25
Loss (MAE):  6503377724071.453 

Iteration: 1000 Weights: [1.71911093e+12 2.17401601e+12 2.16483513e+12 2.01732754e+12
 2.17005951e+12] Bias: 23842602195.709976
Loss (MSE):  6.925071321411408e+21
Loss (MAE):  56813567382.13208 

Iteration: 1500 Weights: [1.47193852e+10 1.86143818e+10 1.85357728e+10 1.72727778e+10
 1.85805027e+10] Bias: 204151948.9677791
Loss (MSE):  807068637.3296984
Loss (MAE):  19405.528586934943 

Iteration: 2000 Weights: [ 4945.91410119 11211.29502513 10742.50932402  5759.27374959
  7942.9555369 ] Bias: 6757.556327094118
Loss (MSE):  2591.852903560663
Loss (

In [None]:
from sklearn.metrics import r2_score

accuracy = r2_score(test_Y, y_pred)
print(f"R2 Score using testing data: {accuracy}")

y_pred_train = train_X.dot(weights) + bias
accuracy_train = r2_score(train_Y, y_pred_train)
print(f"R2 Score using training data: {accuracy_train}")

R2 Score using testing data: 0.9999999996845813
R2 Score using training data: 0.9999999996063824
