<a href="https://colab.research.google.com/github/chandvariarajul/Price-Prediction/blob/main/Bitcoin_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

In [4]:
## read the dataset
df = pd.read_csv('/content/drive/MyDrive/bitcoin.csv')

In [5]:
df.head(10)

Unnamed: 0,Date,Price
0,09-08-2024,54476.6
1,09-07-2024,54156.5
2,09-06-2024,53966.8
3,09-05-2024,56183.2
4,09-04-2024,57973.4
5,09-03-2024,57479.8
6,09-02-2024,59134.0
7,09-01-2024,57315.7
8,08/31/2024,58978.6
9,08/30/2024,59119.7


In [6]:
# Corrected code to remove the 'Date' column
df.drop(['Date'], axis=1, inplace=True)

In [7]:
df.head()

Unnamed: 0,Price
0,54476.6
1,54156.5
2,53966.8
3,56183.2
4,57973.4


In [8]:
predictionDays = 30
# Create another column shifted 'n'  units up
df['Prediction'] = df[['Price']].shift(-predictionDays)
# show the first 5 rows
df.head()

Unnamed: 0,Price,Prediction
0,54476.6,60850.6
1,54156.5,61699.7
2,53966.8,55120.9
3,56183.2,56057.8
4,57973.4,53979.0


In [9]:
df.tail()

Unnamed: 0,Price,Prediction
88,68260.1,
89,67319.8,
90,69538.2,
91,69650.6,
92,69310.1,


In [10]:
df = df.apply(lambda x: x.str.replace(',', '').astype(float) if x.dtype == 'object' else x)

In [11]:
# Create the independent data set
# Convert the DataFrame into a numpy array and drop the prediction column
x = np.array(df.drop(['Prediction'], axis=1))

# Remove the last 'n' rows where 'n' is the predictionDays
x = x[:len(df) - predictionDays]

print(x)


[[54476.6]
 [54156.5]
 [53966.8]
 [56183.2]
 [57973.4]
 [57479.8]
 [59134. ]
 [57315.7]
 [58978.6]
 [59119.7]
 [59373.5]
 [59016. ]
 [59450.9]
 [62846.2]
 [64273.2]
 [64159.3]
 [64053.1]
 [60372.2]
 [61158.1]
 [59005.8]
 [59470.9]
 [58446.3]
 [59483.1]
 [58877.2]
 [57534.6]
 [58707.8]
 [60595.2]
 [59350. ]
 [58713.3]
 [60931.7]
 [60850.6]
 [61699.7]
 [55120.9]
 [56057.8]
 [53979. ]
 [58141.8]
 [60696.7]
 [61478.7]
 [65372.9]
 [64626. ]
 [66184.9]
 [66798.7]
 [68256.3]
 [67843.1]
 [67908.6]
 [65799.3]
 [65370.5]
 [65937.8]
 [67553.6]
 [68158.7]
 [67148.5]
 [66677.4]
 [63980.5]
 [64089.2]
 [65049.7]
 [64782.4]
 [60794.9]
 [59209.8]
 [57885.1]
 [57337.3]
 [57746.7]
 [58039.4]
 [56724.7]]


In [12]:
# Create the dependent data set
# convert the data frame into a numpy array
y = np.array(df['Prediction'])
# Get all the values except last 'n' rows
y = y[:-predictionDays]
print(y)

[60850.6 61699.7 55120.9 56057.8 53979.  58141.8 60696.7 61478.7 65372.9
 64626.  66184.9 66798.7 68256.3 67843.1 67908.6 65799.3 65370.5 65937.8
 67553.6 68158.7 67148.5 66677.4 63980.5 64089.2 65049.7 64782.4 60794.9
 59209.8 57885.1 57337.3 57746.7 58039.4 56724.7 55861.1 58259.2 56641.8
 57026.3 60199.3 62103.3 62890.1 62754.3 60973.4 60403.3 61685.3 60849.4
 61809.4 60292.7 63196.2 64261.  64128.5 64854.3 64980.9 65159.9 66498.8
 66674.7 66223.  66034.8 66773.1 68260.1 67319.8 69538.2 69650.6 69310.1]


In [13]:
# Train-test split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2)

# Set the predictionDays array equal to the last 30 rows from the original data set
predictionDays_array = np.array(df.drop(['Prediction'], axis=1))[-predictionDays:]

print(predictionDays_array)


[[55861.1]
 [58259.2]
 [56641.8]
 [57026.3]
 [60199.3]
 [62103.3]
 [62890.1]
 [62754.3]
 [60973.4]
 [60403.3]
 [61685.3]
 [60849.4]
 [61809.4]
 [60292.7]
 [63196.2]
 [64261. ]
 [64128.5]
 [64854.3]
 [64980.9]
 [65159.9]
 [66498.8]
 [66674.7]
 [66223. ]
 [66034.8]
 [66773.1]
 [68260.1]
 [67319.8]
 [69538.2]
 [69650.6]
 [69310.1]]


In [14]:
## SVR
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.00001)
svr_rbf.fit(xtrain, ytrain)

In [15]:
## testing
svr_rbf_confidence = svr_rbf.score(xtest,ytest)
print('SVR_RBF accuracy :',svr_rbf_confidence)

SVR_RBF accuracy : 0.11877822642103031


In [16]:
# print the predicted values
svm_prediction = svr_rbf.predict(xtest)
print(svm_prediction)
print()
print(ytest)

[64290.40414124 62069.02861881 61813.46246547 61787.66392048
 62901.87661641 64993.50259496 62608.2381949  62504.8289068
 62870.25110204 62517.29048508 63154.78332403 64921.99736022
 62445.9655594 ]

[66773.1 56057.8 61699.7 58259.2 68260.1 64089.2 66223.  65049.7 67843.1
 66674.7 61809.4 65159.9 60794.9]


In [17]:
# Print the model predictions for the next 30 days
svm_prediction = svr_rbf.predict(predictionDays_array)
print(svm_prediction)
print()
#Print the actual price for bitcoin for last 30 days
print(df.tail(predictionDays))

[62187.50297289 62967.3577113  63767.19853098 63097.79652876
 63541.24609518 62654.02743896 62870.25282604 62870.23797715
 61944.52712696 63460.52440582 61277.77405699 61582.86113165
 61662.49148112 63633.83622085 62870.71278345 65413.13558155
 65638.89657706 62601.54986808 62580.83871332 62337.98318805
 63035.43854651 63073.94721923 62723.92390679 63035.32442644
 63019.84414156 62338.50013976 64058.99677463 62870.25275966
 62870.25282384 62870.2395338 ]

      Price  Prediction
63  55861.1         NaN
64  58259.2         NaN
65  56641.8         NaN
66  57026.3         NaN
67  60199.3         NaN
68  62103.3         NaN
69  62890.1         NaN
70  62754.3         NaN
71  60973.4         NaN
72  60403.3         NaN
73  61685.3         NaN
74  60849.4         NaN
75  61809.4         NaN
76  60292.7         NaN
77  63196.2         NaN
78  64261.0         NaN
79  64128.5         NaN
80  64854.3         NaN
81  64980.9         NaN
82  65159.9         NaN
83  66498.8         NaN
84  66674.7 