In [10]:
# Imports
import os
import pandas as pd
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torch.nn.functional as F  # All functions that don't have any parameters
from torch.utils.data import (
    DataLoader,
)  # Gives easier dataset managment and creates mini batches
# import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
# import torchvision.transforms as transforms  # Transformations we can perform on our dataset
from tqdm import tqdm  # progress bar
import pyarrow.parquet as pq


In [11]:
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [19]:
# Import data
train = pq.read_table("/home/esbenlykke/projects/sleep_study/data/data_for_modelling/chained_classifiers/30_sec_training_data.parquet")
test = pq.read_table("/home/esbenlykke/projects/sleep_study/data/data_for_modelling/chained_classifiers/30_sec_testing_data.parquet")

In [21]:
train_df = train.to_pandas()
test_df = test.to_pandas()
# Remove the specified columns
columns_to_remove = ["id", "datetime", "unix_time", "noon_day"]
train_df = train_df.drop(columns=columns_to_remove)
test_df = test_df.drop(columns=columns_to_remove)

        month   day   age       incl      theta         x         y         z   
0         2.0  17.0   6.0  15.798665  -5.310628 -0.166543  0.736582 -0.088525  \
1         2.0  17.0   6.0  20.806454  -8.427656 -0.354135  0.928025 -0.206452   
2         2.0  17.0   6.0  15.256819  -0.600693 -0.268185  0.905933 -0.058268   
3         2.0  17.0   6.0   7.902555  -2.452577 -0.064138  0.966516  0.006280   
4         2.0  17.0   6.0   5.761677   5.070190 -0.092652  0.922352  0.120019   
...       ...   ...   ...        ...        ...       ...       ...       ...   
411961    9.0  30.0  12.0  89.056083 -74.467833  0.235170  0.027814 -0.903933   
411962    9.0  30.0  12.0  88.180862 -81.183678  0.214830  0.017471 -0.983853   
411963    9.0  30.0  12.0  84.408979 -83.088712  0.036370  0.075359 -1.005727   
411964    9.0  30.0  12.0  83.469627 -83.435066 -0.010326  0.138011 -1.000573   
411965    9.0  30.0  12.0  88.157199 -80.687190  0.135086 -0.003870 -0.985143   

             temp    x_mean

In [23]:
# Assuming your DataFrame is named 'df'
numeric_columns = train_df.select_dtypes(include='number').columns

# Normalize the numeric columns using pandas
train_df[numeric_columns] = (train_df[numeric_columns] - train_df[numeric_columns].mean()) / train_df[numeric_columns].std()
test_df[numeric_columns] = (test_df[numeric_columns] - test_df[numeric_columns].mean()) / test_df[numeric_columns].std()

In [28]:
def create_sequences(df, target_column, sequence_length, step_size):
    # Convert DataFrame to numpy array
    data_array = df.to_numpy()

    # Create sequences and labels
    sequences = []
    labels = []

    for i in range(0, len(data_array) - sequence_length, step_size):
        seq = data_array[i:i+sequence_length]
        label = data_array[i+sequence_length, df.columns.get_loc(target_column)]
        sequences.append(seq)
        labels.append(label)

    # Convert sequences and labels to PyTorch tensors
    data_sequences = torch.Tensor(sequences)
    data_labels = torch.Tensor(labels)

    return data_sequences, data_labels

# Assuming you have train and test DataFrames named 'train_df' and 'test_df'
train_sequences, train_labels = create_sequences(train_df, "score", 20, 10)
test_sequences, test_labels = create_sequences(test_df, "score", 20, 10)

print(train_sequences.shape)
print(train_labels.shape)
print(test_sequences.shape)
print(test_labels.shape)


torch.Size([41195, 20, 68])
torch.Size([41195])
torch.Size([122749, 20, 68])
torch.Size([122749])


In [30]:
# Save train tensors
torch.save(train_sequences, "/home/esbenlykke/projects/sleep_study/data/data_for_modelling/lstm/train_predictors.pt")
torch.save(train_labels, "/home/esbenlykke/projects/sleep_study/data/data_for_modelling/lstm/train_labels.pt")

# Save test tensors
torch.save(test_sequences, "/home/esbenlykke/projects/sleep_study/data/data_for_modelling/lstm/test_predictors.pt")
torch.save(test_labels, "/home/esbenlykke/projects/sleep_study/data/data_for_modelling/lstm/test_labels.pt")

In [29]:
os.getcwd()

'/home/esbenlykke/projects/sleep_study/code/create_models/lstm'