In [30]:
import os

# Print the current working directory
print("Current working directory:", os.getcwd())

# List all files in the current working directory
print("Files in the current working directory:", os.listdir(os.getcwd()))


Current working directory: /content
Files in the current working directory: ['.config', 'bitstamp_preprocessed_data.csv', 'bitstampUSD_1-min_data_2012-01-01_to_2020-04-22.csv.zip', 'bitstampUSD_1-min_data_2012-01-01_to_2020-04-22.csv', 'coinbase_preprocessed_data.csv', 'preprocess_data_py.py', 'coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv.zip', 'coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv', 'sample_data']


In [34]:
pip install pandas




Bitstamp Data Loaded:
      Close
0  0.000147
1  0.000147
2  0.000156
3  0.000157
4  0.000157
Shape: (3126480, 1)
Checking for NaN values in preprocessed data...
Bitstamp preprocessed data has NaN values: False
Coinbase Data Loaded:
      Close
0  0.015078
1  0.015078
2  0.018597
3  0.018597
4  0.018949
Shape: (1990691, 1)
Coinbase preprocessed data has NaN values: False


KeyboardInterrupt: 

In [35]:
#!/usr/bin/env python3

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Masking
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [43]:

# Load preprocessed data
bitstamp_data = pd.read_csv('/content/bitstamp_preprocessed_data.csv')
coinbase_data = pd.read_csv('/content/coinbase_preprocessed_data.csv')

# calculating the number of features
n = bitstamp_data.shape[1]
print(f"Number of features: {n}")

# printing a message to confirm that the data has been loaded:
print("Data loaded successfully.")

def prepare_data(data, timesteps):
    X, Y = [], []
    for i in range(len(data) - timesteps):
        # Slicing data for X and Y
        x_sample = data[i:i + timesteps].values  # Input sequence
        y_sample = data.iloc[i + timesteps, 0]    # Target value (close price)

        X.append(x_sample)
        Y.append(y_sample)

    return np.array(X), np.array(Y)

timesteps = 24
bitstamp_X, bitstamp_y = prepare_data(bitstamp_data, timesteps)
coinbase_X, coinbase_y = prepare_data(coinbase_data, timesteps)

# splitting the data into a training set and a validation set
bitstamp_X_train, bitstamp_X_val, bitstamp_y_train, bitstamp_y_val = train_test_split(bitstamp_X, bitstamp_y, test_size=0.2)
coinbase_X_train, coinbase_X_val, coinbase_y_train, coinbase_y_val = train_test_split(coinbase_X, coinbase_y, test_size=0.2)

# creating the datasets
bitstamp_train_dataset = tf.data.Dataset.from_tensor_slices((bitstamp_X_train, bitstamp_y_train))
bitstamp_val_dataset = tf.data.Dataset.from_tensor_slices((bitstamp_X_val, bitstamp_y_val))

coinbase_train_dataset = tf.data.Dataset.from_tensor_slices((coinbase_X_train, coinbase_y_train))
coinbase_val_dataset = tf.data.Dataset.from_tensor_slices((coinbase_X_val, coinbase_y_val))

# printing a message to confirm that the datasets have been created:
print("Datasets created successfully.")

# defining LSTM model
def create_model():
    model = Sequential()
    model.add(LSTM(20, activation='tanh', input_shape=(timesteps, n)))
    model.add(Dense(1))

    # Custom Adam optimizer with a lower learning rate
    adam_optimizer = Adam(learning_rate=0.0001, clipvalue=1.0)

    # Compile the model with the custom optimizer
    model.compile(optimizer=adam_optimizer, loss='mse')

    model.summary()
    return model

# Creating and training models
bitstamp_model = create_model()
coinbase_model = create_model()

# Model training
bitstamp_history = bitstamp_model.fit(bitstamp_train_dataset.batch(256), epochs=5, validation_data=bitstamp_val_dataset.batch(256), verbose=1)
coinbase_history = coinbase_model.fit(coinbase_train_dataset.batch(256), epochs=5, validation_data=coinbase_val_dataset.batch(256), verbose=1)

# printing the training and validation loss and accuracy:
print(bitstamp_model.history.history)

# plotting the training & validation accuracy values
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(bitstamp_history.history['loss'])
plt.plot(bitstamp_history.history['val_loss'])
plt.title('Bitstamp Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# plotting the training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(coinbase_history.history['loss'], label='train')
plt.plot(coinbase_history.history['val_loss'], label='test')
plt.title('Coinbase Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.show()

# saving the models
bitstamp_model.save('bitstamp_model_v2.h5')
coinbase_model.save('coinbase_model_v2.h5')

# printing a message to confirm that the models have been saved!!
print("Models saved successfully!")


Number of features: 1
Data loaded successfully.


KeyboardInterrupt: 

In [45]:
#!/usr/bin/env python3

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.layers import Masking
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Load preprocessed data
bitstamp_data = pd.read_csv('/content/bitstamp_preprocessed_data.csv')
coinbase_data = pd.read_csv('/content/coinbase_preprocessed_data.csv')

# calculating the number of features
n = bitstamp_data.shape[1]
print(f"Number of features: {n}")

# printing a message to confirm that the data has been loaded:
print("Data loaded successfully.")

# prepare data for LSTM
def prepare_data(data, timesteps):
    X, Y = [], []
    for i in range(len(data) - timesteps):
        # Slicing data for X and Y
        x_sample = data[i:i + timesteps].values  # Input sequence
        y_sample = data.iloc[i + timesteps, 0]    # Target value (close price)

        X.append(x_sample)
        Y.append(y_sample)

    return np.array(X), np.array(Y)

timesteps = 60
bitstamp_X, bitstamp_y = prepare_data(bitstamp_data, timesteps)
coinbase_X, coinbase_y = prepare_data(coinbase_data, timesteps)

# splitting the data into a training set and a validation set
bitstamp_X_train, bitstamp_X_val, bitstamp_y_train, bitstamp_y_val = train_test_split(bitstamp_X, bitstamp_y, test_size=0.2)
coinbase_X_train, coinbase_X_val, coinbase_y_train, coinbase_y_val = train_test_split(coinbase_X, coinbase_y, test_size=0.2)

# creating the datasets
bitstamp_train_dataset = tf.data.Dataset.from_tensor_slices((bitstamp_X_train, bitstamp_y_train))
bitstamp_val_dataset = tf.data.Dataset.from_tensor_slices((bitstamp_X_val, bitstamp_y_val))

coinbase_train_dataset = tf.data.Dataset.from_tensor_slices((coinbase_X_train, coinbase_y_train))
coinbase_val_dataset = tf.data.Dataset.from_tensor_slices((coinbase_X_val, coinbase_y_val))

# printing a message to confirm that the datasets have been created:
print("Datasets created successfully.")

# defning LSTM model
def create_model():
    model = Sequential()
    model.add(LSTM(20, activation='tanh', input_shape=(timesteps, n)))
    model.add(Dense(1))

    # Custom Adam optimizer with a lower learning rate
    adam_optimizer = Adam(learning_rate=0.0001, clipvalue=1.0)

    # Compile the model with the custom optimizer
    model.compile(optimizer=adam_optimizer, loss='mse')

    model.summary()
    return model

# Creating and training models
bitstamp_model = create_model()
coinbase_model = create_model()

# Model training
bitstamp_history = bitstamp_model.fit(bitstamp_train_dataset.batch(256), epochs=5, validation_data=bitstamp_val_dataset.batch(256), verbose=1)
coinbase_history = coinbase_model.fit(coinbase_train_dataset.batch(256), epochs=5, validation_data=coinbase_val_dataset.batch(256), verbose=1)

# printing the training and validation loss and accuracy:
print(bitstamp_model.history.history)

# plotting the training & validation accuracy values
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(bitstamp_history.history['loss'])
plt.plot(bitstamp_history.history['val_loss'])
plt.title('Bitstamp Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# plotting the training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(coinbase_history.history['loss'], label='train')
plt.plot(coinbase_history.history['val_loss'], label='test')
plt.title('Coinbase Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.show()

# saving the models
bitstamp_model.save('bitstamp_model_v2.h5')
coinbase_model.save('coinbase_model_v2.h5')

# printing a message to confirm that the models have been saved!!
print("Models saved successfully!")

Number of features: 1
Data loaded successfully.
Datasets created successfully.
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 20)                1760      
                                                                 
 dense (Dense)               (None, 1)                 21        
                                                                 
Total params: 1781 (6.96 KB)
Trainable params: 1781 (6.96 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 20)                1760      
                                                                 
 dense_1 (Dense)             (None, 1)                 

KeyboardInterrupt: 

In [None]:
#!/usr/bin/env python3

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras import mixed_precision
import matplotlib.pyplot as plt

policy = mixed_precision.Policy("mixed_float16")
mixed_precision.set_global_policy(policy)

# Load preprocessed data
bitstamp_data = pd.read_csv('/content/bitstamp_preprocessed_data.csv')
coinbase_data = pd.read_csv('/content/coinbase_preprocessed_data.csv')

# Prepare data for LSTM
def prepare_data(data, timesteps):
    X, Y = [], []
    for i in range(len(data) - timesteps):
        X.append(data[i:i + timesteps])
        Y.append(data.iloc[i + timesteps, 0])
    return np.array(X), np.array(Y)

timesteps = 60
bitstamp_X, bitstamp_y = prepare_data(bitstamp_data, timesteps)
coinbase_X, coinbase_y = prepare_data(coinbase_data, timesteps)

# Split data into training and validation sets
bitstamp_X_train, bitstamp_X_val, bitstamp_y_train, bitstamp_y_val = train_test_split(bitstamp_X, bitstamp_y, test_size=0.2, random_state=42)
coinbase_X_train, coinbase_X_val, coinbase_y_train, coinbase_y_val = train_test_split(coinbase_X, coinbase_y, test_size=0.2, random_state=42)

# Define LSTM model
def create_model(timesteps, n_features):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(timesteps, n_features)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# Create and train models
bitstamp_model = create_model(timesteps, bitstamp_data.shape[1])
coinbase_model = create_model(timesteps, coinbase_data.shape[1])

bitstamp_history = bitstamp_model.fit(bitstamp_X_train, bitstamp_y_train, epochs=5, batch_size=256, validation_data=(bitstamp_X_val, bitstamp_y_val), verbose=1)
coinbase_history = coinbase_model.fit(coinbase_X_train, coinbase_y_train, epochs=5, batch_size=256, validation_data=(coinbase_X_val, coinbase_y_val), verbose=1)

# Plotting the training & validation loss values
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(bitstamp_history.history['loss'])
plt.plot(bitstamp_history.history['val_loss'])
plt.title('Bitstamp Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.subplot(1, 2, 2)
plt.plot(coinbase_history.history['loss'], label='train')
plt.plot(coinbase_history.history['val_loss'], label='validation')
plt.title('Coinbase Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.show()

# Save the models
bitstamp_model.save('bitstamp_model_v2.h5')
coinbase_model.save('coinbase_model_v2.h5')

print("Models saved")


The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.


Epoch 1/5
  76/9771 [..............................] - ETA: 7:50:01 - loss: 0.0118