In [4]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os
import sys
import sqlite3  

# show all columns in pandas
pd.set_option('display.max_columns', None)


In [2]:
# use sqlite3 to connect to the database strava.db
conn = sqlite3.connect('strava.db')
c = conn.cursor()
# use pandas to read the table 'activities' from the database
df = pd.read_sql_query("SELECT * FROM activities", conn)

# use pandas to read activity_details from the database
df_hr = pd.read_sql_query("SELECT * FROM activity_details", conn)

# close the connection to the database
conn.close()


In [11]:
df.type.value_counts()

Ride              644
Run               392
VirtualRide       139
Hike               20
Workout            12
IceSkate            8
EBikeRide           6
Walk                6
NordicSki           2
AlpineSki           1
WeightTraining      1
Name: type, dtype: int64

In [24]:
cond = df.activity_id == 7119038757
#cond = df.type == "VirtualRide"
print(df.loc[cond, "date"].values, df.loc[cond, "name"].values)

['2022-05-10'] ['Wahoo SYSTM: Nine Hammers']


In [85]:
cond = df_hr.activity_id == 7119038757
df_test = df_hr[cond].copy()

In [88]:
df_test.dropna(subset=["watts"], inplace=True)

In [89]:
df_test.head()

Unnamed: 0,activity_id,time,watts,heartrate,distance,velocity_smooth,altitude,cadence
157649,7119038757,1,19.0,97.0,0.9,0.87,,0.0
157650,7119038757,2,36.0,97.0,1.9,0.96,,1.0
157651,7119038757,3,59.0,97.0,3.7,1.22,,1.0
157652,7119038757,4,99.0,98.0,5.7,1.42,,29.0
157653,7119038757,5,127.0,99.0,8.4,1.682,,48.0


In [98]:
len(df_test), int(0.2*len(df_test)), int(0.8*len(df_test))

(3494, 698, 2795)

In [105]:
window = 200
x_data = []
y_data = []

for i in range(len(df_test)-200-1):
    x_data.append(df_test.iloc[i:i+window]["watts"].values)
    y_data.append(df_test.iloc[i+window+1]["heartrate"])

In [107]:
x_data = np.array(x_data)
print(x_data.shape)
y_data = np.array(y_data)
print(y_data.shape)

(3293, 200)
(3293,)


In [108]:
x_train = x_data[:int(0.8*len(x_data))]
y_train = y_data[:int(0.8*len(y_data))]
x_test = x_data[int(0.8*len(x_data)):]
y_test = y_data[int(0.8*len(y_data)):]

In [173]:
# add dimension to x_train and x_test
np.expand_dims(x_train, axis=2).shape

(2634, 200, 1)

In [172]:
torch.from_numpy(x_train).float().unsqueeze(2)
torch.from_numpy(y_train).float().unsqueeze(1)

torch.Size([2634, 1])

In [109]:
# use pytorch dataloader to load the data into batches for training and testing 
train_data = torch.utils.data.TensorDataset(torch.from_numpy(x_train).float(), torch.from_numpy(y_train).float())
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

test_data = torch.utils.data.TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test).float())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)


In [167]:
# define the model
class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Model, self).__init__()
        self.num_layers = 1
        self.ws = 200
        self.bs = 64
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size, hidden_size, self.num_layers, batch_first=True)
        self.linear = torch.nn.Linear(hidden_size, output_size)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return (torch.zeros(self.num_layers, self.bs, self.hidden_size),
                torch.zeros(self.num_layers, self.bs, self.hidden_size))
    
    def forward(self, input):
        #lstm_out, self.hidden = self.lstm(input.view(len(input), 1, -1), self.hidden)
        lstm_out, self.hidden = self.lstm(input, self.hidden)
        y_pred = self.linear(lstm_out[-1])
        return y_pred

# define the model
model = Model(1, 50, 1)
# define the loss function
criterion = torch.nn.MSELoss()
# define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train the model
epochs = 100
for epoch in range(epochs):
    for x, y in train_loader:
        # reshape the data
        #x = x.view(200, 1, 1)
        x = x.unsqueeze(2)
        # clear the gradients
        optimizer.zero_grad()
        # forward pass
        y_pred = model(x)
        # calculate the loss
        loss = criterion(y_pred, y)
        # backward pass
        loss.backward()
        # update the parameters
        optimizer.step()
    # print the loss
    print('epoch: ', epoch,' loss: ', loss.item())

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [164]:
seq_len = 200
bs = 64
num_layers = 1
input_size=1
hidden_size=50

X = torch.rand(bs, seq_len, input_size)

hidden = (torch.zeros(num_layers, bs, hidden_size), torch.zeros(num_layers, bs, hidden_size))

lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

In [165]:
a, b = lstm(X, hidden)
print(X.shape)
print(hidden[0].shape)
print(a.shape)
print(b[0].shape)

torch.Size([64, 200, 1])
torch.Size([1, 64, 50])
torch.Size([64, 200, 50])
torch.Size([1, 64, 50])


In [140]:
dict(lstm.named_parameters()).keys()

dict_keys(['weight_ih_l0', 'weight_hh_l0', 'bias_ih_l0', 'bias_hh_l0', 'weight_ih_l1', 'weight_hh_l1', 'bias_ih_l1', 'bias_hh_l1', 'weight_ih_l2', 'weight_hh_l2', 'bias_ih_l2', 'bias_hh_l2', 'weight_ih_l3', 'weight_hh_l3', 'bias_ih_l3', 'bias_hh_l3', 'weight_ih_l4', 'weight_hh_l4', 'bias_ih_l4', 'bias_hh_l4', 'weight_ih_l5', 'weight_hh_l5', 'bias_ih_l5', 'bias_hh_l5', 'weight_ih_l6', 'weight_hh_l6', 'bias_ih_l6', 'bias_hh_l6', 'weight_ih_l7', 'weight_hh_l7', 'bias_ih_l7', 'bias_hh_l7', 'weight_ih_l8', 'weight_hh_l8', 'bias_ih_l8', 'bias_hh_l8', 'weight_ih_l9', 'weight_hh_l9', 'bias_ih_l9', 'bias_hh_l9', 'weight_ih_l10', 'weight_hh_l10', 'bias_ih_l10', 'bias_hh_l10', 'weight_ih_l11', 'weight_hh_l11', 'bias_ih_l11', 'bias_hh_l11', 'weight_ih_l12', 'weight_hh_l12', 'bias_ih_l12', 'bias_hh_l12', 'weight_ih_l13', 'weight_hh_l13', 'bias_ih_l13', 'bias_hh_l13', 'weight_ih_l14', 'weight_hh_l14', 'bias_ih_l14', 'bias_hh_l14', 'weight_ih_l15', 'weight_hh_l15', 'bias_ih_l15', 'bias_hh_l15', 'weig

In [30]:
# import pytorch lightning
import pytorch_lightning as pl
from pytorch_lightning import Trainer

In [None]:
# use pytorch_lightning lstm model to predict heartrate based on watts and cadence
from pytorch_lightning.metrics.functional import r2score
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

# import pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# import sklearn
from sklearn.preprocessing import StandardScaler


In [32]:
import torch
sentence = torch.tensor(
    [0,
    7,
    1,
    2,
    5,
    6,
    4,
    3]
)

print(sentence)

tensor([0, 7, 1, 2, 5, 6, 4, 3])


In [36]:
torch.manual_seed(123)
embed = torch.nn.Embedding(10, 16)
embedded_sentence = embed(sentence).detach()
#print(embedded_sentence)
print(embedded_sentence.shape)

torch.Size([8, 16])


In [37]:
omega = torch.empty(8,8)
for i, x_i in enumerate(embedded_sentence):
    for j, x_j in enumerate(embedded_sentence):
        omega[i,j] = torch.dot(x_i, x_j)

In [40]:
omega_mat = embedded_sentence.matmul(embedded_sentence.T)

In [46]:
import torch.nn.functional as F
attention_weights = F.softmax(omega, dim=1)
print(attention_weights.shape)

torch.Size([8, 8])


In [56]:
x_2 = embedded_sentence[1,:]
context_vec_2 = torch.zeros(x_2.shape)
for j in range(8):
    x_j = embedded_sentence[j,:]
    context_vec_2 += attention_weights[1,j] * x_j
print(context_vec_2)

tensor([-9.3975e-01, -4.6856e-01,  1.0311e+00, -2.8192e-01,  4.9373e-01,
        -1.2896e-02, -2.7327e-01, -7.6358e-01,  1.3958e+00, -9.9543e-01,
        -7.1287e-04,  1.2449e+00, -7.8077e-02,  1.2765e+00, -1.4589e+00,
        -2.1601e+00])


In [58]:
context_vectors = torch.matmul(attention_weights, embedded_sentence)

In [65]:
d = embedded_sentence.shape[1]
print(d)
U_query = torch.randn(d, d)
U_key = torch.randn(d, d)
U_value = torch.randn(d, d)

x_2 = embedded_sentence[1,:]
print(x_2.shape)
query_2 = U_query.matmul(x_2)
print(query_2.shape)

key_2 = U_key.matmul(x_2)
value_2 = U_value.matmul(x_2)

keys = U_key.matmul(embedded_sentence.T).T
values = U_value.matmul(embedded_sentence.T).T

16
torch.Size([16])
torch.Size([16])


In [67]:
omega_23 = query_2.dot(keys[2])
omega_23

tensor(-227.7193)

In [69]:
omega_2 = query_2.matmul(keys.T)
omega_2

tensor([ -27.4746,  -69.3855, -227.7193,    7.6392,  -63.0225,    8.0977,
          -7.1507,  -48.4587])

In [79]:
omega_2 / d**0.5

tensor([ -6.8686, -17.3464, -56.9298,   1.9098, -15.7556,   2.0244,  -1.7877,
        -12.1147])

In [78]:
F.softmax(omega_2 , dim=0)

tensor([2.1795e-16, 1.3698e-34, 0.0000e+00, 3.8734e-01, 7.9449e-32, 6.1266e-01,
        1.4619e-07, 1.6790e-25])

In [74]:
attention_weights_2 = F.softmax(omega_2 / d**0.5 , dim=0)
print(attention_weights_2.sum(dim=0))
attention_weights_2

tensor(1.0000)


tensor([7.1756e-05, 2.0204e-09, 1.3019e-26, 4.6590e-01, 9.9149e-09, 5.2248e-01,
        1.1548e-02, 3.7803e-07])

In [76]:
context_vector_2 = attention_weights_2.matmul(values)
context_vector_2

tensor([ 2.3713, -0.1583, -1.0061,  7.7921,  6.6322, -2.5136, -1.9096, -1.1220,
         0.9517, -6.8023,  1.1373,  6.3096, -3.7652,  4.6426,  4.9339,  2.3610])

In [None]:
# create a class for the dataset to be used in the pytorch lightning model 
class HeartrateDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.scaler = StandardScaler()
        self.scaler.fit(self.df)
        self.df = self.scaler.transform(self.df)
        self.df = torch.from_numpy(self.df).float()
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        return self.df[idx, :-1], self.df[idx, -1]



In [None]:

# use pytorch lightning lstm model to predict heartrate based on watts and cadence
class HeartratePrediction(pl.LightningModule):
    def __init__(self, input_size, hidden_size, num_layers, output_size, lr=0.001, batch_size=32):
        super().__init__()
        self.save_hyperparameters()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.loss = nn.MSELoss()
        self.batch_size = batch_size
        self.lr = lr
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.input_size = input_size
        self.h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
        self.c0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
        
    def forward(self, x):
        out, _ = self.lstm(x, (self.h0, self.c0))
        out = self.fc(out[:, -1, :])
        return out
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        self.log('val_loss', loss)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        self.log('test_loss', loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)
    
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
    
    def setup(self, stage=None):
        if stage == 'fit' or stage is None:
            self.train_dataset = HeartrateDataset(self.hparams, 'train')
            self.val_dataset = HeartrateDataset(self.hparams
