# GRU Time Series Forecasting of Neuralink Dataset


## Imports


In [6]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from glob import glob
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import math
from scipy.io import wavfile

### Function Definitions


In [7]:
WINDOW_SIZE = 200

In [8]:
def create_train_test_set(data, window_size):
    raw_data = data.to_numpy()  # convert to numpy array
    data = []

    # create all possible sequences of length window_size using the
    # sliding window method
    for current_index in range(len(raw_data) - window_size):
        data.append(raw_data[current_index : current_index + window_size])

    data = np.array(data)
    test_set_size = int(np.round(0.2 * data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)

    x_train = data[:train_set_size, :-1, :]
    y_train = data[:train_set_size, -1, :]

    x_test = data[train_set_size:, :-1]
    y_test = data[train_set_size:, -1, :]

    return [x_train, y_train, x_test, y_test]

In [9]:
class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn) = self.gru(x, (h0.detach()))
        out = self.output_layer(out[:, -1, :])
        return out

## Multi-layer Gated Recurrent Unit Recurrent Neural Network (GRU)


### Importing Data & Normalizing the Neural Data


In [10]:
data_dir = "../../data/"
data_file_l = glob(data_dir + "*.wav")
rate, data = wavfile.read(data_file_l[0])
df = pd.DataFrame(data, columns=["Amplitude"])
df.index.name = "Sample"

In [11]:
# The values are normalized between -1 and 1
scaler = MinMaxScaler(feature_range=(-1, 1))
df["Amplitude"] = scaler.fit_transform(df["Amplitude"].values.reshape(-1, 1))

### Splitting the Data into Training & Testing Sets


In [12]:
x_train, y_train, x_test, y_test = create_train_test_set(df, window_size=WINDOW_SIZE)

In [13]:
x_train = torch.from_numpy(x_train).type(torch.Tensor)
x_test = torch.from_numpy(x_test).type(torch.Tensor)
y_train = torch.from_numpy(y_train).type(torch.Tensor)
y_test = torch.from_numpy(y_test).type(torch.Tensor)

In [14]:
x_train.shape

torch.Size([78799, 199, 1])

In [15]:
x_test.shape

torch.Size([19700, 199, 1])

#### Defining the Neural Network


In [16]:
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100

model = GRU(
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    output_dim=output_dim,
    num_layers=num_layers,
)

criterion = nn.MSELoss(reduction="mean")
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

#### Training the Neural Network


In [17]:
hist = np.zeros(num_epochs)

In [251]:
# %%time
gru = []

for t in range(num_epochs):
    y_train_pred = model(x_train)

    loss = criterion(y_train_pred, y_train)
    print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()

    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

#### Reformatting the Data


In [133]:
predict = pd.DataFrame(scaler.inverse_transform(y_train_pred.detach().numpy()))
original = pd.DataFrame(scaler.inverse_transform(y_train.detach().numpy()))

In [None]:
result_df = pd.concat([original, predict], axis=1)
result_df.columns = ["Truth", "Training Pred"]
result_df.index.name = "Sample"
result_df["Residuals"] = result_df["Truth"] - result_df["Training Pred"]
result_df.head()

In [None]:
result_df.loc[(result_df["Residuals"] > -1) & (result_df["Residuals"] < 1)]

In [None]:
result_df.__len__()

##### Plotting the Training Predictions


In [None]:
result_df.head(1000).plot()
plt.axhline(y=0)
plt.title("Training Prediction")
plt.show()

#### Making Predictions Using the Test Data


In [None]:
x_test.shape

In [139]:
# Make Predictions
y_test_pred = model(x_test)

In [140]:
# Invert Predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(y_train.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test.detach().numpy())

##### Calculate Root Mean Squared Error


In [None]:
trainScore = math.sqrt(mean_squared_error(y_train[:, 0], y_train_pred[:, 0]))
print("Train Score: %.2f RMSE" % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:, 0], y_test_pred[:, 0]))
print("Test Score: %.2f RMSE" % (testScore))
gru.append(trainScore)
gru.append(testScore)

##### Shift Predictions for Plotting


In [148]:
window_size = WINDOW_SIZE

In [None]:
trainPredictPlot.shape

In [157]:
# Shift train predictions for plotting
trainPredictPlot = np.empty_like(df)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[window_size : len(y_train_pred) + window_size, :] = y_train_pred

# Shift test predictions for plotting
testPredictPlot = np.empty_like(df)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(y_train_pred) + window_size - 1 : len(data) - 1, :] = y_test_pred

original = scaler.inverse_transform(df.values.reshape(-1, 1))

predictions = np.append(trainPredictPlot, testPredictPlot, axis=1)
predictions = np.append(predictions, original, axis=1)
result = pd.DataFrame(predictions)

In [None]:
plt.title("GRU Results")
plt.plot(result.index, result[2], label="original", linewidth=1)
plt.plot(result.index, result[1], label="test", linewidth=0.5)
plt.plot(result.index, result[0], label="train", linewidth=0.5)
plt.grid(True)
plt.legend()
plt.show()

#### Comparing Test Results with Ground Truth Values


In [192]:
sub_ground_truth_values = result[2][result[1] > 0]

In [193]:
sub_test_values = result[1][result[1] > 0]

In [None]:
sub_ground_truth_values

In [195]:
sub_df = pd.concat([sub_ground_truth_values, sub_test_values], axis=1)
sub_df.columns = ["Ground Truth", "Test Predictions"]

In [196]:
sub_df["Residuals"] = sub_df["Ground Truth"] - sub_df["Test Predictions"]

In [None]:
sub_df

In [None]:
plt.title("Test Values Vs. Original Values")
plt.plot(
    result[2][result[1] > 0].index,
    result[2][result[1] > 0],
    label="Original values",
    linewidth=0.5,
)
plt.plot(result[1], label="Test Values", linewidth=0.5)
plt.plot(sub_df["Residuals"], label="Residuals", linewidth=0.5)
plt.axhline(y=0)
plt.legend()
plt.show()

In [None]:
sub_df["Residuals"].loc[(sub_df["Residuals"] > -1) & (sub_df["Residuals"] < 1)]

In [None]:
sub_df.__len__()

In [226]:
percent_residuals_between_minus_one_and_one = (
    len(sub_df["Residuals"].loc[(sub_df["Residuals"] > -1) & (sub_df["Residuals"] < 1)])
    / sub_df.__len__()
) * 100

In [None]:
print(
    f"{percent_residuals_between_minus_one_and_one:.2f}% of the residuals are between -1 and 1."
)

In [None]:
# There are no residuals that are equal to zero.
sub_df["Residuals"].loc[sub_df["Residuals"] == 0]

In [None]:
model.parameters()

In [238]:
torch.save(model, "neural_gru.pickle")