In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import math
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load data
data = pd.read_csv("/content/drive/My Drive/7Train1.csv", header=None)
bandwidth = data.iloc[:, 0].values  # Convert to NumPy array
length = len(bandwidth)
mean = np.mean(bandwidth)
len_train = math.floor(length * 0.8)




# Convert to PyTorch tensor
data_tensor = torch.FloatTensor(bandwidth).view(-1, 1)

# Prediction size
predict_size = 10

# Function to create in-out sequences
def create_inout_sequences(input_data, window_size, predict_size):
    inout_seq = []
    L = len(input_data)
    for i in range(L - window_size - predict_size + 1):
        train_seq = input_data[i:i + window_size]
        train_label = input_data[i + window_size:i + window_size + predict_size]
        inout_seq.append((train_seq, train_label))
    return inout_seq

# Parameters
window_size = 30
batch_size = 8

# Create sequences for training from the first part of the data
train_inout_seq = create_inout_sequences(data_tensor[:len_train], window_size, predict_size)

# Custom dataset class
class TimeSeriesDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx]

# Create DataLoader for training
train_dataset = TimeSeriesDataset(train_inout_seq)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Create sequences for testing from the remaining part of the data
test_inout_seq = create_inout_sequences(data_tensor[len_train:], window_size, predict_size)
test_dataset = TimeSeriesDataset(test_inout_seq)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, drop_last=True)

# Define Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_size=1, embed_dim=10, num_layers=2, hidden_dim=128, num_heads=2, output_size=1, dropout=0.2):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_size, embed_dim)
        self.pos_encoder = PositionalEncoding(embed_dim, dropout)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=hidden_dim, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.decoder_layer = nn.TransformerDecoderLayer(d_model=embed_dim, nhead=num_heads, dim_feedforward=hidden_dim, dropout=dropout)
        self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(embed_dim, output_size)

    def forward(self, src, tgt, tgt_mask=None):
        src = self.embedding(src)
        src = self.pos_encoder(src)
        memory = self.transformer_encoder(src)

        tgt = self.embedding(tgt)
        tgt = self.pos_encoder(tgt)
        output = self.transformer_decoder(tgt, memory, tgt_mask=tgt_mask)
        return self.fc_out(output)

    def generate_square_subsequent_mask(self, sz):
        mask = torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)
        return mask

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

# Initialize the model, loss function, and optimizer
model = TransformerModel(dropout=0.2, num_layers=2)
loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
epochs = 25
train_losses = []

for epoch in range(epochs):
    model.train()
    epoch_train_loss = 0
    for seq, labels in train_loader:
        optimizer.zero_grad()

        seq = seq.view(window_size, batch_size, -1)
        labels = labels.view(predict_size, batch_size, -1)

        tgt = torch.cat((seq[-1:], labels[:-1]), dim=0)
        tgt_mask = model.generate_square_subsequent_mask(tgt.size(0))

        y_pred = model(seq, tgt, tgt_mask)
        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

        epoch_train_loss += single_loss.item()

    train_losses.append(epoch_train_loss / len(train_loader))
    if epoch % 5 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {single_loss.item():.8f}')

# Making predictions
model.eval()
predictions = []

for test_seq, _ in test_loader:
    test_seq = test_seq.view(window_size, 1, -1)
    pred_seq = test_seq[-1:]

    for i in range(predict_size):
        tgt_mask = model.generate_square_subsequent_mask(pred_seq.size(0))
        with torch.no_grad():
            y_pred = model(test_seq, pred_seq, tgt_mask)
            next_pred = y_pred[i]  # Take the prediction at the current time step
            next_pred = next_pred.view(1, 1, -1)  # Reshape to match pred_seq dimensions
            pred_seq = torch.cat((pred_seq, next_pred), dim=0)  # Append the prediction to the sequence

    predictions.append(next_pred.item())

# Convert predictions back to original scale

actual_values = bandwidth[len_train + window_size + predict_size-1:length]

mae = mean_absolute_error(actual_values, predictions)
rmse = np.sqrt(mean_squared_error(actual_values, predictions))

mean_actual = np.mean(actual_values)
error_ratio_rmse = (rmse / mean_actual) * 100
error_ratio_mae = (mae / mean_actual) * 100

print(f'Mean Absolute Error (MAE): {mae:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')
print(f'Error Ratio RMSE: {error_ratio_rmse:.4f}%')
print(f'Error Ratio MAE: {error_ratio_mae:.4f}%')

predictions_195 = predictions[:195]
actual_values_195 = actual_values[:195]

plt.figure(figsize=(10, 6))
plt.plot(range(len_train + window_size, len_train + window_size + 195), actual_values_195, label='Actual Data')
plt.plot(range(len_train + window_size, len_train + window_size + 195), predictions_195, label='Transformer Predictions')
plt.legend()
plt.xlabel("Index")
plt.ylabel("Bandwidth")
plt.title("Transformer Predictions vs Actual Data (First 195 values from 3000 onwards)")
plt.show()
