<a href="https://colab.research.google.com/github/Tulipraaj/FoCID-Forecasting_Cryptocurrency_Investment_Decision/blob/main/model_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU, Bidirectional
from keras.layers import Flatten, ConvLSTM2D
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error ,r2_score

In [4]:
df = pd.read_csv("BTC.csv")

In [5]:
df.head()

Unnamed: 0,date,block_size,difficulty,hashrate,market_cap,mining_profitability,transaction,tweets,price
0,01-01-2018,966169,1922580000000.0,1.5e+19,226661000000.0,2.174,241601,62044.0,13523
1,02-01-2018,968332,1931140000000.0,1.6e+19,233415000000.0,2.431,340809,77723.0,13924
2,03-01-2018,975882,1931140000000.0,1.49e+19,251929000000.0,2.737,395806,79086.0,15027
3,04-01-2018,975713,1931140000000.0,1.64e+19,249552000000.0,2.606,424840,74534.0,14883
4,05-01-2018,974894,1931140000000.0,1.5e+19,266323000000.0,2.631,342564,76404.0,15881


In [6]:
df.tail()

Unnamed: 0,date,block_size,difficulty,hashrate,market_cap,mining_profitability,transaction,tweets,price
1821,27-12-2022,706675,35364100000000.0,2.12e+20,323439000000.0,0.0589,239244,85632.0,16807
1822,28-12-2022,542347,35364100000000.0,2.53e+20,320239000000.0,0.0704,272781,107117.0,16640
1823,29-12-2022,692913,35364100000000.0,2.74e+20,319323000000.0,0.0545,265814,96225.0,16591
1824,30-12-2022,725552,35364100000000.0,2.54e+20,318539000000.0,0.0658,290857,97462.0,16550
1825,31-12-2022,573296,35364100000000.0,2.94e+20,318944000000.0,0.0557,242169,77382.0,16570


In [7]:
df.isna().sum()

date                     0
block_size               0
difficulty               0
hashrate                 0
market_cap               0
mining_profitability     0
transaction              0
tweets                  37
price                    0
dtype: int64

In [8]:
df.dtypes

date                     object
block_size                int64
difficulty              float64
hashrate                float64
market_cap              float64
mining_profitability    float64
transaction               int64
tweets                  float64
price                     int64
dtype: object

In [9]:
df.fillna(method='ffill', inplace=True)


In [10]:
df.isna().sum()

date                    0
block_size              0
difficulty              0
hashrate                0
market_cap              0
mining_profitability    0
transaction             0
tweets                  0
price                   0
dtype: int64

In [11]:
X = df[['tweets','block_size','market_cap','difficulty','hashrate','mining_profitability','transaction']]
# Assuming 'Close' is the target variable
y = df['price']

In [12]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(np.array(y).reshape(-1, 1))


In [13]:
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        seq = data[i:i + sequence_length]
        sequences.append(seq)
    return np.array(sequences)

In [14]:
sequence_length = 60  # Adjust this based on how many past data points you want to use for prediction
X_sequence = create_sequences(X_scaled, sequence_length)
y_sequence = y_scaled[sequence_length:]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X_sequence, y_sequence, test_size=0.2, random_state=42)

In [16]:
ensemble_size = 1  # Number of GRU models in the ensemble
num_records = X_train.shape[0]
bag_size = int(0.8 * num_records)

In [18]:
ensemble = []
for _ in range(ensemble_size):
    # Create a bootstrap sample
    indices = np.random.choice(num_records, size=bag_size, replace=True)
    X_bag = X_train[indices]
    y_bag = y_train[indices]

    # Build and compile the GRU model
    model = Sequential()
    model.add(LSTM(32, activation='relu', input_shape=(sequence_length, X_train.shape[2]),return_sequences=True))
    model.add(GRU(64, activation='relu', input_shape=(sequence_length, X_train.shape[2]),return_sequences=True))
    model.add(LSTM(32, activation='relu', input_shape=(sequence_length, X_train.shape[2])))

    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model on the bootstrap sample
    model.fit(X_bag, y_bag, epochs=10, batch_size=32, verbose=0)

    # Append the trained model to the ensemble
    ensemble.append(model)





In [19]:
from sklearn.metrics import mean_absolute_error

# Step 7: Make predictions on the testing data
y_pred = model.predict(X_test)

# Step 8: Inverse transform the predictions and true values to the original scale
y_pred_original = scaler.inverse_transform(y_pred)
y_test_original = scaler.inverse_transform(y_test)

# Step 9: Evaluate the model's performance

def mean_percentage_error(y_true, y_pred):
    return np.mean((y_true - y_pred) / y_true) * 100

def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mse = mean_squared_error(y_test_original, y_pred_original)
mae = mean_absolute_error(y_test_original, y_pred_original)
mpe = mean_percentage_error(y_test_original, y_pred_original)
mape = mean_absolute_percentage_error(y_test_original, y_pred_original)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Mean Percentage Error:", mpe)
print("Mean Absolute Percentage Error:", mape)

Mean Squared Error: 4185304.538886296
Mean Absolute Error: 1322.9969647940943
Mean Percentage Error: 2.470740085976652
Mean Absolute Percentage Error: 7.293581304458645


In [21]:
import matplotlib.pyplot as plt

# Step 7: Make predictions on the training and validation data
y_train_pred = model.predict(X_train)
y_val_pred = model.predict(X_test)

# Step 8: Calculate the performance metrics
mse_train = mean_squared_error(y_train, y_train_pred)
rmse_train = np.sqrt(mse_train)
mae_train = mean_absolute_error(y_train, y_train_pred)

r2_train = r2_score(y_train, y_train_pred)

mse_val = mean_squared_error(y_test, y_val_pred)
rmse_val = np.sqrt(mse_val)
mae_val = mean_absolute_error(y_test, y_val_pred)
r2_val = r2_score(y_test, y_val_pred)

print("Training MSE:", mse_train)
print("Training RMSE:", rmse_train)
print("Training MAE:", mae_train)

print("Training R-squared:", r2_train)

print("Validation MSE:", mse_val)
print("Validation RMSE:", rmse_val)
print("Validation MAE:", mae_val)

print("Validation R-squared:", r2_val)

Training MSE: 0.0008956132202943018
Training RMSE: 0.029926797695281428
Training MAE: 0.019856365648789134
Training R-squared: 0.9873800811049843
Validation MSE: 0.001012573882159068
Validation RMSE: 0.03182096607834319
Validation MAE: 0.020578260528632968
Validation R-squared: 0.9860378363429769


In [22]:
df.corr()

  df.corr()


Unnamed: 0,block_size,difficulty,hashrate,market_cap,mining_profitability,transaction,tweets,price
block_size,1.0,-0.192238,-0.245351,-0.118483,0.17615,0.519285,-0.209529,-0.111356
difficulty,-0.192238,1.0,0.98128,0.581672,-0.415679,-0.02512,0.684638,0.567948
hashrate,-0.245351,0.98128,1.0,0.584665,-0.404508,0.000298,0.672749,0.571135
market_cap,-0.118483,0.581672,0.584665,1.0,0.031528,-0.113602,0.749314,0.999788
mining_profitability,0.17615,-0.415679,-0.404508,0.031528,1.0,-0.13265,0.029822,0.046558
transaction,0.519285,-0.02512,0.000298,-0.113602,-0.13265,1.0,-0.280354,-0.114472
tweets,-0.209529,0.684638,0.672749,0.749314,0.029822,-0.280354,1.0,0.743737
price,-0.111356,0.567948,0.571135,0.999788,0.046558,-0.114472,0.743737,1.0
