In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt

In [22]:
tweets_path = '/content/drive/My Drive/database/Tweets/bitcoin/'
bitcoin_file = '/content/drive/My Drive/database/bitcoinHistoricalData/bitcoin.csv'
crypto_name = 'bitcoin'

In [23]:
data = pd.read_csv(bitcoin_file, encoding='utf-8', parse_dates=True, usecols=["Timestamp", "Close", "Weighted_Price"])
group = data.groupby('Timestamp')
Real_Price = group['Weighted_Price'].mean()
print(data)
print(Real_Price)
# 967.372019

                  Timestamp     Close  Weighted_Price
0       2017-01-01 00:00:00    967.34      967.372019
1       2017-01-01 00:01:00    967.47      967.415156
2       2017-01-01 00:02:00    966.65      967.333900
3       2017-01-01 00:03:00    967.48      967.357170
4       2017-01-01 00:04:00    966.62      967.479187
...                     ...       ...             ...
525595  2017-12-31 23:55:00  13961.23    13978.662816
525596  2017-12-31 23:56:00  13930.01    13946.754817
525597  2017-12-31 23:57:00  13930.00    13930.003929
525598  2017-12-31 23:58:00  13930.01    13930.009965
525599  2017-12-31 23:59:00  13931.02    13930.015248

[525600 rows x 3 columns]
Timestamp
2017-01-01 00:00:00      967.372019
2017-01-01 00:01:00      967.415156
2017-01-01 00:02:00      967.333900
2017-01-01 00:03:00      967.357170
2017-01-01 00:04:00      967.479187
                           ...     
2017-12-31 23:55:00    13978.662816
2017-12-31 23:56:00    13946.754817
2017-12-31 23:57:00    1393

In [24]:
# We will predict here last 30 days price.
prediction_days = 43200 # 43200m = 30 days
# prediction_days = 60000 # 43200m = 30 days
# Real_Price = Real_Price[:len(Real_Price)-10*prediction_days]
df_train= Real_Price[:len(Real_Price)-prediction_days] # Training set = contains data removing the last 30 days
df_test= Real_Price[len(Real_Price)-prediction_days:] # Test set = contains only last 30 days
print(df_train)
print(df_test)

Timestamp
2017-01-01 00:00:00     967.372019
2017-01-01 00:01:00     967.415156
2017-01-01 00:02:00     967.333900
2017-01-01 00:03:00     967.357170
2017-01-01 00:04:00     967.479187
                          ...     
2017-02-04 23:55:00    1036.813793
2017-02-04 23:56:00    1036.758949
2017-02-04 23:57:00    1036.752810
2017-02-04 23:58:00    1036.760000
2017-02-04 23:59:00    1036.961518
Name: Weighted_Price, Length: 50400, dtype: float64
Timestamp
2017-02-05 00:00:00    1037.520371
2017-02-05 00:01:00    1038.131545
2017-02-05 00:02:00    1037.855262
2017-02-05 00:03:00    1038.577733
2017-02-05 00:04:00    1038.783165
                          ...     
2017-03-06 23:55:00    1279.801693
2017-03-06 23:56:00    1280.520094
2017-03-06 23:57:00    1280.644383
2017-03-06 23:58:00    1280.000000
2017-03-06 23:59:00    1279.992297
Name: Weighted_Price, Length: 43200, dtype: float64


In [25]:
# Currently, our data is in the form: [samples, features] and we are framing the problem as one time step for each sample. 
# We can transform the prepared train and test input data into the expected structure using numpy.reshape() as follows
training_set = df_train.values
training_set = np.reshape(training_set, (len(training_set), 1))
# print(training_set)

test_set = df_test.values
inputs = np.reshape(test_set, (len(test_set), 1))
# print(inputs)
# LSTMs are sensitive to the scale of the input data, specifically when the sigmoid (default) or 
# tanh activation functions are used. It can be a good practice to rescale the data to the range 
# of 0-to-1, also called normalizing. We can easily normalize the dataset using the 
# MinMaxScaler preprocessing class from the scikit-learn library
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1)) # changed here
training_set = sc.fit_transform(training_set) # transforming Training data set
# print(training_set)
inputs = sc.transform(inputs) # transforming Test data set
# print(inputs)
# reshape into X=t and Y=t+1 ie., in training data set value at t will be mapped to value at t+1
X_train = training_set[0:len(training_set)-1]
y_train = training_set[1:len(training_set)]
# Here we are training the model with previous min value with the next min value
# Therefore X_train will contain one value less than training set and
# y_train will not have the starting value of training set

X_train = np.reshape(X_train, (len(X_train), 1, 1))
# print(X_train)
inputs = np.reshape(inputs, (len(inputs), 1, 1))
# print(inputs)

In [26]:
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

# Initialising the RNN
regressor = Sequential()

# Adding the input layer and the LSTM layer (4 LSTM blocks or neurons).
# Default sigmoid activation function is used for the LSTM blocks.
regressor.add(LSTM(units = 4, activation = 'sigmoid', input_shape=(1, 1)))

# Adding the output layer
regressor.add(Dense(units = 1))

# Compiling the RNN
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

# Fitting the RNN to the Training set
regressor.fit(X_train, y_train, batch_size = 5, epochs = 4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7fc7af2068d0>

In [27]:
# Making the predictions
predicted_BTC_price = regressor.predict(inputs)
predicted_BTC_price = sc.inverse_transform(predicted_BTC_price)
# print(predicted_BTC_price)
dataframe = pd.DataFrame.from_records(predicted_BTC_price)
print(dataframe)
dataframe.columns = ['Predicted_Price']
df_test = df_test.to_frame(name='Actual_Price')
df_test = df_test.reset_index()
df = pd.concat([df_test, dataframe], axis=1)
print(df)

                 0
0      1039.114746
1      1039.723022
2      1039.448120
3      1040.167114
4      1040.371338
...            ...
43195  1254.755859
43196  1255.293457
43197  1255.386475
43198  1254.904419
43199  1254.898682

[43200 rows x 1 columns]
                 Timestamp  Actual_Price  Predicted_Price
0      2017-02-05 00:00:00   1037.520371      1039.114746
1      2017-02-05 00:01:00   1038.131545      1039.723022
2      2017-02-05 00:02:00   1037.855262      1039.448120
3      2017-02-05 00:03:00   1038.577733      1040.167114
4      2017-02-05 00:04:00   1038.783165      1040.371338
...                    ...           ...              ...
43195  2017-03-06 23:55:00   1279.801693      1254.755859
43196  2017-03-06 23:56:00   1280.520094      1255.293457
43197  2017-03-06 23:57:00   1280.644383      1255.386475
43198  2017-03-06 23:58:00   1280.000000      1254.904419
43199  2017-03-06 23:59:00   1279.992297      1254.898682

[43200 rows x 3 columns]


In [None]:
import matplotlib.pyplot as plt
x1=df['Timestamp']
y1=df['Predicted_Price']
print(x1.shape)
plt.figure(figsize = (25,10))
plt.plot(x1, y1, label = "Predicted BTC price", linewidth=0.5)
x2 = df['Timestamp']
# print(x2)
y2 = df['Actual_Price']
plt.plot(x2, y2, label = "Actual BTC Price", linewidth=0.5)
plt.title("Time Series Forecasting by RNN")
# print("Before legend")
plt.legend()
# print("Before show")
# plt.xlim(x1[0],x1[len(x1)-1])
# plt.ylim(y1[0],y1[len(y1)-1])
plt.axis()
plt.show()