## Processing raw data; creating train and test sets

In [136]:
import numpy as np
import plotly.graph_objs as go
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import linear_model

In [76]:
with open('./raw_data/full_data/names_and_shapes.txt') as file:
    lines = []
    for line in file:
        sep = line.find(',')
        name = line[:sep]
        lines.append(name)

In [77]:
X_list = []
Y_list = []

for i in range(len(lines) // 2):
    X_file = './raw_data/full_data/' + lines[i]
    X_i = np.load(X_file)
    X_list.append(X_i)
    Y_file = './raw_data/full_data/' + lines[i + len(lines) // 2]
    Y_i = np.load(Y_file)
    Y_list.append(Y_i)

In [78]:
num_sessions = len(X_list)
time_bins = X_list[0].shape[2]

In [79]:
train_split = 0.8
X_train = [X[:int(train_split * X.shape[0])] for X in X_list]
X_test = [X[int(train_split * X.shape[0]):] for X in X_list]
Y_train = [Y[:int(train_split * Y.shape[0])] for Y in Y_list]
Y_test = [Y[int(train_split * Y.shape[0]):] for Y in Y_list]

## Encoding with Poisson regression

In [80]:
def create_avg_and_max_matrices(X):
    avg_matrix = np.empty((0, time_bins))
    max_matrix = np.empty((0, time_bins))
    
    for session in X:
        avg_spikes = np.mean(session, axis=1)
        max_spikes = np.max(session, axis=1)
        avg_matrix = np.vstack((avg_matrix, avg_spikes))
        max_matrix = np.vstack((max_matrix, max_spikes))
        
    return avg_matrix, max_matrix

In [81]:
train_avg_spikes, train_max_spikes = create_avg_and_max_matrices(X_train)
test_avg_spikes, test_max_spikes = create_avg_and_max_matrices(X_test)

In [89]:
def create_wheel_speed_matrix(Y):
    wheel_speed_matrix = np.empty((0, time_bins))

    for session in Y:
        wheel_speed_matrix = np.vstack((wheel_speed_matrix, session))
    
    return wheel_speed_matrix

In [90]:
train_wheel_speeds = create_wheel_speed_matrix(Y_train)
test_wheel_speeds = create_wheel_speed_matrix(Y_test)

In [117]:
def train_and_predict(train_spikes, test_spikes, train_speeds, test_speeds):
    encoders = [linear_model.PoissonRegressor() for _ in range(time_bins)]
    spikes_pred = np.zeros(test_spikes.shape)

    for t in range(time_bins):
        train_speeds_dim = np.expand_dims(train_speeds[:,t], axis=1)
        test_speeds_dim = np.expand_dims(test_speeds[:,t], axis=1)
        encoders[t].fit(train_speeds_dim, train_spikes[:,t])
        spikes_pred[:,t] = encoders[t].predict(test_speeds_dim)
        
    return encoders, spikes_pred

In [118]:
avg_encoders, avg_spikes_pred = train_and_predict(train_avg_spikes, test_avg_spikes, train_wheel_speeds, test_wheel_speeds) 
max_encoders, max_spikes_pred = train_and_predict(train_max_spikes, test_max_spikes, train_wheel_speeds, test_wheel_speeds)

In [137]:
print(f'MSE for average spikes: {mean_squared_error(test_avg_spikes, avg_spikes_pred)}')
print(f'MSE for maximum spikes: {mean_squared_error(test_max_spikes, max_spikes_pred)}')
print(f'R-squared for average spikes: {r2_score(test_avg_spikes, avg_spikes_pred)}')
print(f'R-squared for maximum spikes: {r2_score(test_max_spikes, max_spikes_pred)}')

MSE for average spikes: 0.03327826055121139
MSE for maximum spikes: 4.692961438400262
R-squared for average spikes: -0.0008471324065031149
R-squared for maximum spikes: 0.0001365736782785204


## Decoding with Reduced Rank Model

## Berkan's code

## Enoding

In [7]:
clfs = [linear_model.PoissonRegressor() for _ in range(D)]
X_test_hat = np.zeros((X_test.shape[0], D))

# add a dimension to Y_train and Y_test
Y_train_dim = Y_train[:, np.newaxis]
Y_test_dim = Y_test[:, np.newaxis]
for d in range(D):
    clfs[d].fit(Y_train_dim, X_train[:, d])
    X_test_hat[:, d] = clfs[d].predict(Y_test_dim)

betas = [float(clfs[d].coef_) for d in range(D)]

print(f'MSE: {mean_squared_error(X_test, X_test_hat)}')
print(f"beta: {betas}")
# Plot the betas using plotly
fig = go.Figure()
fig.add_trace(go.Bar(x=np.arange(D), y=betas))
fig.update_layout(
    xaxis_title="Neuron",
    yaxis_title="Value",
    title="Beta for each neuron"
)
fig.show()

MSE: 0.5898138303392012
beta: [0.20400896724781722, 0.23406932708166714, 0.04268874337952797, 0.11370658818387654, 0.01518584730689122, 0.03337051584140189, 0.07119438513146414, 0.03782546099862029, -0.060297463670699596, -0.015584820072791628, 0.008825355385077088, -0.032948056524980895, 0.01842511827139427, 0.015278192592384731, 0.0741797876242447, 0.04905218565816887, 0.007159493198042481, 0.020717698251592084, 0.006296493683754506, 0.004720416375770121, -0.03156617941050992, 0.008444348620928957, -0.018550062457030565, 0.012196418376286449]



Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)



In [8]:
sessions = 4

plot_z = X_test_hat[:40 * sessions].T
fig = go.Figure(data=go.Heatmap(
                     z=plot_z,
                     colorscale='Viridis'))
fig.update_layout(
    xaxis_title="Time bin",
    yaxis_title="Neuron",
    title="Predicted number of spikes"
)
for i in range(sessions - 1):
    fig.add_shape(type="line",
        x0=40*(i+1), y0=-0.5, x1=40*(i+1), y1=23.5,
        line=dict(color="Red",width=1)
    )
fig.show()

plot_z = X_test[:40 * sessions].T
fig = go.Figure(data=go.Heatmap(
                     z=plot_z,
                     colorscale='Viridis'))
fig.update_layout(
    xaxis_title="Time bin",
    yaxis_title="Neuron",
    title="True number of spikes"
)
for i in range(sessions - 1):
    fig.add_shape(type="line",
        x0=40*(i+1), y0=-0.5, x1=40*(i+1), y1=23.5,
        line=dict(color="Red",width=1)
    )
fig.show()

## Decoding

In [9]:
clf = linear_model.Ridge(alpha=.2)
clf.fit(X_train, Y_train)
Y_test_hat = clf.predict(X_test)

print(f'MSE: {mean_squared_error(Y_test, Y_test_hat)}')

# Plot the betas using plotly
fig = go.Figure()
fig.add_trace(go.Bar(x=np.arange(D), y=clf.coef_))
fig.update_layout(
    xaxis_title="Neuron",
    yaxis_title="Value",
    title="Ridge regression beta for each neuron"
)
fig.show()

MSE: 0.8809422762444616


In [10]:
# Try a small neural network using sklearn
from sklearn.neural_network import MLPRegressor

clf = MLPRegressor(hidden_layer_sizes=(400, 400, 400), max_iter=1000, n_iter_no_change=100, tol=1e-4)
clf.fit(X_train, Y_train)
Y_test_hat = clf.predict(X_test)

print(f'MSE: {mean_squared_error(Y_test, Y_test_hat)}')

data = []
data.append(go.Scatter(x=np.arange(len(clf.loss_curve_)), y=clf.loss_curve_, mode='lines', name='Loss curve'))

fig = go.Figure(data=data)
fig.update_layout(
    xaxis_title="Epoch",
    yaxis_title="Loss",
    title="Loss curve"
)
fig.show()

MSE: 1.1366456825008449


In [11]:
# Train a neural network using pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self, hidden_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(D, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
    
net = Net(400)

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

X_train_torch = torch.from_numpy(X_train).float()
Y_train_torch = torch.from_numpy(Y_train).float()
X_test_torch = torch.from_numpy(X_test).float()
Y_test_torch = torch.from_numpy(Y_test).float()

epochs = 1000
batch_size = 100

loss_train = []
loss_test = []

for epoch in range(epochs):
    permutation = torch.randperm(X_train_torch.shape[0])
    for i in range(0, X_train_torch.shape[0], batch_size):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = X_train_torch[indices], Y_train_torch[indices]

        optimizer.zero_grad()

        outputs = net(batch_x)
        loss = criterion(outputs, batch_y.unsqueeze(-1))
        loss.backward()
        optimizer.step()
        
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, loss: {loss.item()}')

    outputs = net(X_train_torch)
    loss_train.append(criterion(outputs, Y_train_torch.unsqueeze(-1)))
    outputs = net(X_test_torch)
    loss_test.append(criterion(outputs, Y_test_torch.unsqueeze(-1)))

Epoch 0, loss: 1.0610132217407227
Epoch 10, loss: 0.47626158595085144
Epoch 20, loss: 0.5589691996574402
Epoch 30, loss: 0.3065098822116852
Epoch 40, loss: 0.3074653148651123
Epoch 50, loss: 0.16831016540527344
Epoch 60, loss: 0.14898696541786194
Epoch 70, loss: 0.0732480064034462
Epoch 80, loss: 0.11618396639823914
Epoch 90, loss: 0.050241075456142426
Epoch 100, loss: 0.07659020274877548
Epoch 110, loss: 0.0759902223944664
Epoch 120, loss: 0.14594632387161255
Epoch 130, loss: 0.3374762237071991
Epoch 140, loss: 0.0831337720155716
Epoch 150, loss: 0.1356569230556488
Epoch 160, loss: 0.09581134468317032
Epoch 170, loss: 0.4756656289100647
Epoch 180, loss: 0.06765664368867874
Epoch 190, loss: 0.08835096657276154
Epoch 200, loss: 0.09339697659015656
Epoch 210, loss: 0.11122559756040573
Epoch 220, loss: 0.08027961105108261
Epoch 230, loss: 0.06544233113527298
Epoch 240, loss: 0.17370367050170898
Epoch 250, loss: 0.15004049241542816
Epoch 260, loss: 0.04537535086274147
Epoch 270, loss: 0.09

In [12]:
data = []
data.append(go.Scatter(x=np.arange(len(loss_train)), y=loss_train, mode='lines', name='Train loss'))
data.append(go.Scatter(x=np.arange(len(loss_test)), y=loss_test, mode='lines', name='Test loss'))

fig = go.Figure(data=data)
fig.update_layout(
    xaxis_title="Epoch",
    yaxis_title="Loss",
    title="PyTorch Loss curve"
)
fig.show()

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [162]:
plot_x = np.arange(40)
plot_ys_true = [Y_test[40*trial : 40*(trial + 1)] for trial in range(2)]
plot_ys_pred = [Y_test_hat[40*trial : 40*(trial + 1)] for trial in range(2)] 

data = []
for i, plot_y_true in enumerate(plot_ys_true):
    data.append(go.Scatter(x=plot_x, y=plot_y_true, mode='markers+lines', name=f'Session true {i+1}'))

for i, plot_y_pred in enumerate(plot_ys_pred):
    data.append(go.Scatter(x=plot_x, y=plot_y_pred, mode='markers+lines', name=f'Session pred {i+1}'))

fig = go.Figure(data=data)
fig.update_layout(
    xaxis_title="Time bin",
    yaxis_title="Wheel speed",
    title="True and predicted wheel speed of the first 2 sessions on testing data"
)
fig.show()

In [163]:
Y_train_hat = clf.predict(X_train)

plot_x = np.arange(40)
plot_ys_true = [Y_train[40*trial : 40*(trial + 1)] for trial in range(2)]
plot_ys_pred = [Y_train_hat[40*trial : 40*(trial + 1)] for trial in range(2)] 

data = []
for i, plot_y_true in enumerate(plot_ys_true):
    data.append(go.Scatter(x=plot_x, y=plot_y_true, mode='markers+lines', name=f'Session true {i+1}'))

for i, plot_y_pred in enumerate(plot_ys_pred):
    data.append(go.Scatter(x=plot_x, y=plot_y_pred, mode='markers+lines', name=f'Session pred {i+1}'))

fig = go.Figure(data=data)
fig.update_layout(
    xaxis_title="Time bin",
    yaxis_title="Wheel speed",
    title="Wheel speed of the first 2 sessions on training data"
)
fig.show()