In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
import ast
import torch
import torch.nn as nn
import math
scaler = MinMaxScaler()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import numpy as np
import torch
import matplotlib.pyplot as plt
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("Training on device: ", device)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, text_emb, btc_data, target):
        self.text_emb = text_emb
        self.btc_data = btc_data
        self.target = target
        
    def __len__(self):
        return len(self.target)
    
    def __getitem__(self, idx):
        return self.text_emb[idx], self.btc_data[idx], self.target[idx]

In [None]:
btc_path = "/kaggle/input/bitcoin-dataset-all/All_btc_final.csv"
txt_path = "/kaggle/input/bitcoin-dataset-all/All_text_final.csv"
textdf = pd.read_csv(txt_path)
btcdf = pd.read_csv(btc_path)
textdf.head()
btcdf.head()

In [None]:
textdf[textdf['ada_embedding'].isnull()]

In [None]:
import sys
class Dataset:
    def __init__(self,text_path,btc_path,window_size = 5):
        self.textdf = pd.read_csv(text_path).iloc[:2500]
        columns = ['open','high','low','close','volume']
        self.btcdf1 = pd.read_csv(btc_path).iloc[:2500]
        self.btcdf = self.btcdf1[columns]
        self.window = window_size
        self.ind = 0
        
    def get_n_day_data_text(self,data):
        n = self.window
        return np.array([data[i:i+n].to_list() for i in range(len(data)-n)])
    
    def get_n_day_data_btc(self,data):
        n = self.window
        return np.array([data[i:i+n] for i in range(len(data)-n)])
    
    def text_preprocess(self,data):
        def convert_to_float(value):
            # Try to evaluate the string as a list and return the result
            return np.array(ast.literal_eval(value), dtype=np.float32)
        data = data['ada_embedding'].map(convert_to_float)
        return self.get_n_day_data_text(data)
    
    def btc_preprocess(self,data):
        return self.get_n_day_data_btc(data)
    
    def prepare_dataloader(self,batch_size=32,val_size=0.2,test_size=0.2):
        window_size = self.window
        n = len(self.textdf)
        train_size = n - int(n*val_size) - int(n*test_size)
        val_size = train_size+int(n*val_size)
        
        self.scaler1 = MinMaxScaler()
        self.scaler2 = MinMaxScaler()
        
        train_text = torch.FloatTensor(self.text_preprocess(self.textdf[:train_size+window_size])).to(device)
        val_text = torch.FloatTensor(self.text_preprocess(self.textdf[train_size+window_size:val_size+window_size])).to(device)
        test_text = torch.FloatTensor(self.text_preprocess(self.textdf[val_size+window_size:])).to(device)

        train_btc = torch.FloatTensor(self.btc_preprocess(self.scaler1.fit_transform(self.btcdf[:train_size+window_size]))).to(device)
        val_btc = torch.FloatTensor(self.btc_preprocess(self.scaler1.transform(self.btcdf[train_size+window_size:val_size+window_size]))).to(device)
        test_btc = torch.FloatTensor(self.btc_preprocess(self.scaler1.transform(self.btcdf[val_size+window_size:]))).to(device)
        
        target_train = torch.FloatTensor(self.scaler2.fit_transform(np.array(self.btcdf['close'][window_size:window_size+train_size].to_list()).reshape(-1,1))).to(device)
        target_val = torch.FloatTensor(self.scaler2.transform(np.array(self.btcdf['close'][(2*window_size)+train_size:window_size+val_size].to_list()).reshape(-1,1))).to(device)
        target_test = torch.FloatTensor(self.scaler2.transform(np.array(self.btcdf['close'][(2*window_size)+val_size:].to_list()).reshape(-1,1))).to(device)
        
        self.train_date = self.btcdf1['time'][window_size:window_size+train_size]       
        self.val_date = self.btcdf1['time'][(2*window_size)+train_size:window_size+val_size]
        self.test_date = self.btcdf1['time'][(2*window_size)+val_size:]
        
        train_dataset = CustomDataset(train_text, train_btc, target_train)
        val_dataset = CustomDataset(val_text, val_btc, target_val)
        test_dataset = CustomDataset(test_text, test_btc, target_test)
        
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
        val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        
        return train_dataloader,val_dataloader, test_dataloader

In [None]:
btc_path = "/kaggle/input/bitcoin-dataset-all/All_btc_final.csv"
txt_path = "/kaggle/input/bitcoin-dataset-all/All_text_final.csv"
window_size = 6
df = Dataset(txt_path,btc_path,window_size)
train_dataloader,val_dataloader,test_dataloader = df.prepare_dataloader()

In [None]:
train_date = df.train_date
val_date = df.val_date
test_date = df.test_date

In [None]:
for text_emb_batch, btc_data_batch, target_batch in test_dataloader:
    print(text_emb_batch.shape, btc_data_batch.shape, target_batch.shape)    
    # printing the dataset from train dataloader
#     print(text_emb_batch[0], btc_data_batch[0], target_batch[0])
    break

In [None]:
def train(model, dataloader, criterion, optimizer):
    model.train()
    total_loss = 0.0
    predictions = []
    for text_emb, btc_data, target in dataloader:
        optimizer.zero_grad()
        
        output = model(text_emb, btc_data)
        predictions.extend(output.cpu().detach().numpy())
        loss = criterion(output, target)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
    return total_loss / len(dataloader), np.array(predictions)

In [None]:
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    predictions = []
    with torch.no_grad():
        for text_emb, btc_data, target in dataloader:
            output = model(text_emb, btc_data)
            predictions.extend(output.cpu().detach().numpy())
            loss = criterion(output, target)
            total_loss += loss.item()
    predictions = np.array(predictions)
    return total_loss / len(dataloader) , predictions

In [None]:
def test(model, dataloader,criterion):
    model.eval()
    predictions = []
    targets = []
    
    with torch.no_grad():
        for text_emb, btc_data, target in dataloader:
            output = model(text_emb.to(device), btc_data.to(device))
            
            predictions.extend(output.cpu().numpy())
            targets.extend(target.cpu().numpy())
            loss = criterion(output, target)
            
    predictions = np.array(predictions)
    targets = np.array(targets)
#     print(predictions.shape)
#     print(targets.shape)
    mse = mean_squared_error(targets, predictions)
    r2 = r2_score(targets, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(targets, predictions)
    mape = mean_absolute_percentage_error(targets, predictions)
    
    return mse, r2, rmse, mae, mape, predictions

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        
        # Create a matrix of shape (max_len, d_model)
        pe = torch.zeros(max_len, d_model)
        
        # Get position indices (shape: [max_len, 1])
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        
        # Compute the div_term (shape: [d_model // 2])
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        
        # Compute the positional encodings (shape: [max_len, d_model])
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        # Add a batch dimension (shape: [1, max_len, d_model])
        pe = pe.unsqueeze(0)
        
        # Register the positional encoding as a buffer in the model
        self.register_buffer('pe', pe)

    def forward(self, x):
        # Add positional encoding to the input tensor
        x = x + self.pe[:, :x.size(1), :].to(x.device)
        return x

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PositionalEncodingCNN(nn.Module):
    def __init__(self, d_model, max_len=5000, kernel_size=3):
        super(PositionalEncodingCNN, self).__init__()
        self.d_model = d_model
        self.max_len = max_len
        
        # Define a simple CNN for generating positional encodings
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=d_model, kernel_size=kernel_size, padding=kernel_size // 2)
        self.conv2 = nn.Conv1d(in_channels=d_model, out_channels=d_model, kernel_size=kernel_size, padding=kernel_size // 2)
        self.conv3 = nn.Conv1d(in_channels=d_model, out_channels=d_model, kernel_size=kernel_size, padding=kernel_size // 2)
        
        # Create a learnable positional encoding matrix
        self.position_indices = nn.Parameter(torch.arange(0, max_len, dtype=torch.float).unsqueeze(0), requires_grad=False)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        # Generate positional encodings using CNN
        position_indices = self.position_indices[:, :seq_len].unsqueeze(1)  # Shape: [1, 1, seq_len]
        position_indices = position_indices.repeat(batch_size,1,1)
        # Pass position indices through the CNN
        pos_enc = self.conv1(position_indices)
        pos_enc = F.relu(pos_enc)
        pos_enc = self.conv2(pos_enc)
        pos_enc = F.relu(pos_enc)
        pos_enc = self.conv3(pos_enc)
        
        # Reshape and repeat for the batch size
        pos_enc = pos_enc.permute(0, 2, 1)  # Shape: [1, seq_len, d_model]
        
        # Add positional encoding to the input tensor
        x = x + pos_enc.to(x.device)
        return x

In [None]:
class BTCEncoder(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward):
        super(BTCEncoder, self).__init__()
        self.transformer_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward,batch_first = True)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer,2)
#         kernel_size=3
#         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=int(d_model/3), kernel_size=kernel_size, padding=kernel_size // 2)
#         self.batch_norm1 = nn.BatchNorm1d(int(d_model/3))
#         self.conv2 = nn.Conv1d(in_channels=int(d_model/3), out_channels=int(d_model/2), kernel_size=kernel_size, padding=kernel_size // 2)
#         self.batch_norm2 = nn.BatchNorm1d(int(d_model/2))
#         self.conv3 = nn.Conv1d(in_channels=int(d_model/2), out_channels=d_model, kernel_size=kernel_size, padding=kernel_size // 2)
        self.batch_norm3 = nn.BatchNorm1d(d_model)
        
    def forward(self, btc_data,src_mask = None):
        # text_emb: [batch_size, seq_len, d_model]
        # btc_data: [batch_size, seq_len, 5]
#         queries = text_emb.transpose(1,2)
        values = btc_data
# #         print(values.shape)
#         out = self.batch_norm1(self.conv1(values))
#         out = self.batch_norm2(self.conv2(out))
#         out = self.batch_norm3(self.conv3(out))

        # Compute transformer output
#         out = self.fc(btc_data)
        out = self.batch_norm3(self.transformer_encoder(values).transpose(1,2)).transpose(1,2)
        
        return out

In [None]:
class TextEncoder(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward):
        super(TextEncoder, self).__init__()
        self.transformer_encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward,batch_first = True)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer,4)\
#         kernel_size=3
#         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=int(d_model/3), kernel_size=kernel_size, padding=kernel_size // 2)
#         self.batch_norm1 = nn.BatchNorm1d(int(d_model/3))
#         self.conv2 = nn.Conv1d(in_channels=int(d_model/3), out_channels=int(d_model/2), kernel_size=kernel_size, padding=kernel_size // 2)
#         self.batch_norm2 = nn.BatchNorm1d(int(d_model/2))
#         self.conv3 = nn.Conv1d(in_channels=int(d_model/2), out_channels=d_model, kernel_size=kernel_size, padding=kernel_size // 2)
        self.batch_norm3 = nn.BatchNorm1d(d_model)
        
    def forward(self, text_emb,src_mask = None):
        # text_emb: [batch_size, seq_len, d_model]
        # btc_data: [batch_size, seq_len, 5]
        
        queries = text_emb
#         values = btc_data.transpose(1,2)
#         out = self.batch_norm1(self.conv1(queries))
#         out = self.batch_norm2(self.conv2(out))
#         out = self.batch_norm3(self.conv3(out))
        # Compute transformer output

        out = self.batch_norm3(self.transformer_encoder(queries).transpose(1,2)).transpose(1,2)
        return out

In [None]:
class TransformerLayer(nn.Module):
    def __init__(self, text_dmodel,btc_dmodel, text_heads,btc_heads,dim_feedforward,window_size):
        super(TransformerLayer, self).__init__()
        self.text_posEncoder = PositionalEncodingCNN(d_model=text_dmodel, max_len=window_size)
        self.btc_posEncoder = PositionalEncodingCNN(d_model=btc_dmodel, max_len=window_size)
        self.TextEncoder = TextEncoder(text_dmodel, text_heads, dim_feedforward)
        self.BTCEncoder = BTCEncoder(btc_dmodel, btc_heads, dim_feedforward)
        self.posEncoder = PositionalEncodingCNN(d_model=text_dmodel+btc_dmodel, max_len=window_size)
        self.transformer_encoder_layer = nn.TransformerEncoderLayer(text_dmodel+btc_dmodel, 1, dim_feedforward,batch_first = True)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer,2)
        self.batch_norm1 = nn.BatchNorm1d(btc_dmodel+text_dmodel)
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear((text_dmodel+btc_dmodel)*window_size,512)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(512,1)
        
    def forward(self, text_emb, btc_data,src_mask = None):
        # text_emb: [batch_size, seq_len, d_model]
        # btc_data: [batch_size, seq_len, 5]
        text_emb = self.text_posEncoder(text_emb)
        btc_data = self.btc_posEncoder(btc_data)
#         plot_encoding(btc_data.cpu().detach().numpy()[0])
        text_out = self.TextEncoder(text_emb)
        btc_out = self.BTCEncoder(btc_data)
#         print(text_out.shape,btc_out.shape)
        queries = torch.cat((text_out, btc_out), dim=-1)
#         queries = self.posEncoder(torch.cat((text_out, btc_out), dim=-1))
        # Compute transformer output

        out = self.batch_norm1(self.transformer_encoder(queries).transpose(1,2)).transpose(1,2)
#         print("btc_data: ",btc_data.shape)
#         
        out = self.flatten(out)
        out = self.linear1(out)
        out = self.relu(out)
        
        out = self.linear2(out)
#         print(out.shape)
        return out

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
def plot_data(date,true,prediction):
#     plt.figure(figsize=(14, 7))
    plt.plot(date,true, label='True')
    plt.plot(date,prediction, label='Predicted')

    # Add labels and legend
    plt.xlabel('Date')
    plt.ylabel('Price($)')
    plt.title('True vs Predicted Bitcoin Prices')
    plt.legend()
    tick_positions = date[::30]
    plt.xticks(tick_positions,rotation=45)
    
#     plt.xticks(tick_positions)
    plt.gcf().autofmt_xdate()
    # Show plot
    plt.show()

In [None]:
text_dmodel = 1536  # Dimension of text embeddings
btc_dmodel = 5
text_heads = 32
btc_heads = 5
# if d_model%2==0:
#     nhead = n/2
# else:
#     nhead = 1  # Number of heads in Transformer
dim_feedforward = 100  # Dimension of feedforward layer in Transformer
lr = 0.0001  # Learning rate
weight_decay = 0.0001
batch_size = 32  # Batch size
epochs = 100 # Number of epochs

# Create model, criterion, optimizer
model = TransformerLayer(text_dmodel,btc_dmodel, text_heads,btc_heads, dim_feedforward,window_size).to(device)
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,weight_decay = weight_decay)

In [None]:
def plot_encoding(encodings,title="sinusoidal Positional Encoding"):
    plt.figure(figsize=(5, 5))
    plt.title(title)
    plt.pcolormesh(encodings, cmap='viridis')
    plt.xlabel('Embedding Dimension')
    plt.ylabel('Sequence Position')
    plt.tight_layout()
    plt.show()

In [None]:
for epoch in range(epochs):
    train_loss, pred2 = train(model, train_dataloader, criterion, optimizer)
    val_loss,pred = evaluate(model, val_dataloader, criterion)
    t_val = []
    for i,j,k in val_dataloader:
        for l in k.view(-1):
            t_val.append(l.cpu())
    true = df.scaler2.inverse_transform([np.array(t_val)]).reshape(-1,1)
    prediction = df.scaler2.inverse_transform(pred)
    plot_data(val_date,true,prediction)
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Train Loss: {val_loss:.4f}")
    
    mse, r2, rmse, mae, mape, _ = test(model, val_dataloader)
    print(f"MSE: {mse:.4f}, R2: {r2:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.4f}")

In [None]:
for i,j,k in val_dataloader:
    pass

In [None]:
mse, r2, rmse, mae, mape, pred = test(model, val_dataloader)
print(f"MSE: {mse:.4f}, R2: {r2:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.4f}")

In [None]:
mse, r2, rmse, mae, mape, pred2 = test(model, test_dataloader)
print(f"MSE: {mse:.4f}, R2: {r2:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.4f}")

In [None]:
t_test = []
for i,j,k in val_dataloader:
    for l in k.view(-1):
        t_test.append(l.cpu())

In [None]:
t_test2 = []
for i,j,k in test_dataloader:
    for l in k.view(-1):
        t_test2.append(l.cpu())

In [None]:
target_test = np.array(t_test)

In [None]:
target_test2 = np.array(t_test2)

In [None]:
true = df.scaler2.inverse_transform([target_test]).reshape(-1,1)
prediction = df.scaler2.inverse_transform(pred)

In [None]:
true2 = df.scaler2.inverse_transform([target_test2]).reshape(-1,1)
prediction2 = df.scaler2.inverse_transform(pred2)

In [None]:
plot_data(val_date,true,prediction)

In [None]:
plot_data(test_date,true2,prediction2)

In [None]:
btc_dec = pd.read_csv(btc_path)
btc_jan = pd.read_csv("/kaggle/input/bitcoin-dataset-all/All_btc_final.csv")
plot_data(btc_dec['time'],btc_dec['close'],btc_dec['close'])
btc_dec.shape