In [None]:
import pandas as pd
import numpy as np
import pandas as pd
import torch

df_train = pd.read_csv('preprocess_data/train.csv')
df_validation = pd.read_csv('preprocess_data/validation.csv')

x_train_df = df_train[df_train.columns.drop(list(df_train.filter(regex='Spine')))]
y_train_df = df_train[list(df_train.filter(regex='Spine'))]
y_train_df['id'] = df_train['id']


x_validation_df = df_validation[df_validation.columns.drop(list(df_validation.filter(regex='Spine')))]
y_validation_df = df_validation[list(df_validation.filter(regex='Spine'))]
y_validation_df['id'] = df_validation['id']


In [72]:
import torch

sequence_length = 1

ids = x_train_df['id'].unique()

x_train = []
y_train = []
lengths = []

for id in ids:
    df_single_id = x_train_df[x_train_df['id'] == id]
    number_of_frames = df_single_id.shape[0]
    x_animation = []
    
    quotient = number_of_frames // sequence_length
    remainder = number_of_frames % sequence_length
    
    df_single_id_x = df_single_id.drop(['id', 'Frame'], axis=1)
    
    # take the values spine from df_single_id
    df_single_id_y = y_train_df[y_train_df['id'] == id]
    df_single_id_y = df_single_id_y.drop(['id'], axis=1)
    
    for i in range(quotient):
        x_train.append(torch.tensor(df_single_id_x.iloc[i*sequence_length:(i+1)*sequence_length].values, dtype=torch.float32))
        y_train.append(torch.tensor(df_single_id_y.iloc[(i+1)*sequence_length-1].values, dtype=torch.float32))
        lengths.append(sequence_length)
        
    if remainder != 0:
        padded = torch.zeros(sequence_length, df_single_id_x.shape[1])
        padded[:remainder] = torch.tensor(df_single_id_x.iloc[-remainder:].values, dtype=torch.float32)
        y_train.append(torch.tensor(df_single_id_y.iloc[quotient*sequence_length + remainder - 1 ].values, dtype=torch.float32))
        x_train.append(padded)
        lengths.append(remainder)
        
x_train = torch.stack(x_train)
y_train = torch.stack(y_train)

In [73]:
# shuffle the data around first axes 
indices = torch.randperm(x_train.shape[0])
x_train = x_train[indices]
y_train = y_train[indices]
lengths = torch.tensor(lengths)[indices]

In [74]:
ids = x_validation_df['id'].unique()

x_validation = []
y_validation = []
lengths_val = []

for id in ids:
    df_single_id = x_validation_df[x_validation_df['id'] == id]
    number_of_frames = df_single_id.shape[0]
    x_animation = []
    
    quotient = number_of_frames // sequence_length
    remainder = number_of_frames % sequence_length
    
    df_single_id_x = df_single_id.drop(['id', 'Frame'], axis=1)
    
    # take the values spine from df_single_id
    df_single_id_y = y_validation_df[y_validation_df['id'] == id]
    df_single_id_y = df_single_id_y.drop(['id'], axis=1)
    
    for i in range(quotient):
        x_validation.append(torch.tensor(df_single_id_x.iloc[i*sequence_length:(i+1)*sequence_length].values, dtype=torch.float32))
        y_validation.append(torch.tensor(df_single_id_y.iloc[(i+1)*sequence_length-1].values, dtype=torch.float32))
        lengths_val.append(sequence_length)
        
    if remainder != 0:
        padded = torch.zeros(sequence_length, df_single_id_x.shape[1])
        padded[:remainder] = torch.tensor(df_single_id_x.iloc[-remainder:].values, dtype=torch.float32)
        y_validation.append(torch.tensor(df_single_id_y.iloc[quotient*sequence_length + remainder - 1 ].values, dtype=torch.float32))
        x_validation.append(padded)
        lengths_val.append(remainder)
        
x_validation = torch.stack(x_validation)
y_validation = torch.stack(y_validation)

In [78]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.nn.utils.rnn import pack_padded_sequence

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)

    def forward(self, x, lengths):
        data = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        #data = x
        out, _ = self.lstm(data)
        # add dropout
        out, _ = torch.nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
        #out = self.fc2(out)
        #out = torch.relu(out)
        out = self.fc(out[:, -1, :])  # Get output from the last time step
        return out

In [64]:
from torchsummary import summary

# Initialize model
input_size = x_train.shape[2]
hidden_size = 32
num_layers = 1
output_size = 12
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0003)

# Train the model
num_epochs = 10000
batch_size = x_train.shape[0]



for epoch in range(num_epochs):
    for i in range(0, 1):
        model.train()
        optimizer.zero_grad()  # Zero the gradients
        batch_data = x_train[i:i + batch_size]
        batch_labels = y_train[i:i + batch_size]

        # Forward pass
        outputs = model(x_train, lengths)
        loss = criterion(outputs, y_train)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()
        
        
    if (epoch + 1) % 20 == 0:
        model.eval()
        with torch.no_grad():
            val_outputs = model(x_validation, lengths_val)
            val_loss = criterion(val_outputs, y_validation)
    
    
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.10f}, Validation Loss: {val_loss.item():.10f}')

print("Training finished!")

Epoch [20/10000], Training Loss: 1.1244455576, Validation Loss: 0.6902565360
Epoch [40/10000], Training Loss: 0.8598522544, Validation Loss: 0.5584358573
Epoch [60/10000], Training Loss: 0.6518928409, Validation Loss: 0.4376107156
Epoch [80/10000], Training Loss: 0.5113423467, Validation Loss: 0.3630029857
Epoch [100/10000], Training Loss: 0.4081244469, Validation Loss: 0.3108651042
Epoch [120/10000], Training Loss: 0.3319714963, Validation Loss: 0.2646967471
Epoch [140/10000], Training Loss: 0.2832886279, Validation Loss: 0.2295845002
Epoch [160/10000], Training Loss: 0.2433966547, Validation Loss: 0.1983471662
Epoch [180/10000], Training Loss: 0.2057455778, Validation Loss: 0.1681747139
Epoch [200/10000], Training Loss: 0.1785031855, Validation Loss: 0.1445733905
Epoch [220/10000], Training Loss: 0.1571953297, Validation Loss: 0.1258459985
Epoch [240/10000], Training Loss: 0.1394940168, Validation Loss: 0.1115760952
Epoch [260/10000], Training Loss: 0.1245974898, Validation Loss: 0.0

In [83]:
val_outputs = model(x_validation, lengths_val)

from sklearn.metrics import mean_squared_error


def calculae_amse_tensor(y_pred, y_validation_df):
    mse_values = []  # To store MSE values for each column

    # calculate average MSE for each column of numpy array of y_pred and y_validation_df
    for i in range(y_pred.shape[1]):
        mse = mean_squared_error(y_validation_df[:, i], y_pred[:, i])
        mse_values.append(mse)

    # Calculate the mean of MSE values for all columns
    average_mse = sum(mse_values) / len(mse_values)

    print(f"Average Mean Squared Error: {average_mse}")

val_outputs = val_outputs.detach().numpy()
y_validation = y_validation.detach().numpy()

calculae_amse_tensor(val_outputs, y_validation)

Average Mean Squared Error: 1.7848064651091893


In [79]:
from torchsummary import summary

# Initialize model
input_size = x_train.shape[2]
hidden_size = 32
num_layers = 1
output_size = 12
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0003)

# Train the model
num_epochs = 10000



for epoch in range(num_epochs):
    for i in range(0, 1):
        model.train()
        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        outputs = model(x_train, lengths)
        loss = criterion(outputs, y_train)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()
        
        
    if (epoch + 1) % 20 == 0:
        model.eval()
        with torch.no_grad():
            val_outputs = model(x_validation, lengths_val)
            val_loss = criterion(val_outputs, y_validation)
    
    
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.10f}, Validation Loss: {val_loss.item():.10f}')

print("Training finished!")

Epoch [20/10000], Training Loss: 1.0748441219, Validation Loss: 0.6202629805
Epoch [40/10000], Training Loss: 0.9206137657, Validation Loss: 0.5179287791
Epoch [60/10000], Training Loss: 0.7670961618, Validation Loss: 0.4235973954
Epoch [80/10000], Training Loss: 0.6291338205, Validation Loss: 0.3464701772
Epoch [100/10000], Training Loss: 0.5192005038, Validation Loss: 0.2873368561
Epoch [120/10000], Training Loss: 0.4379957616, Validation Loss: 0.2430446297
Epoch [140/10000], Training Loss: 0.3753544390, Validation Loss: 0.2080564946
Epoch [160/10000], Training Loss: 0.3238978088, Validation Loss: 0.1769459546
Epoch [180/10000], Training Loss: 0.2765240967, Validation Loss: 0.1520048231
Epoch [200/10000], Training Loss: 0.2351768315, Validation Loss: 0.1328781992
Epoch [220/10000], Training Loss: 0.2051547021, Validation Loss: 0.1181939691
Epoch [240/10000], Training Loss: 0.1808957607, Validation Loss: 0.1070468277
Epoch [260/10000], Training Loss: 0.1598491669, Validation Loss: 0.0

In [80]:
num_epochs = 10000
optimizer = optim.Adam(model.parameters(), lr=0.003)



for epoch in range(num_epochs):
    for i in range(0, 1):
        model.train()
        optimizer.zero_grad()  # Zero the gradients

        # Forward pass
        outputs = model(x_train, lengths)
        loss = criterion(outputs, y_train)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()
        
        
    if (epoch + 1) % 20 == 0:
        model.eval()
        with torch.no_grad():
            val_outputs = model(x_validation, lengths_val)
            val_loss = criterion(val_outputs, y_validation)
    
    
            print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.10f}, Validation Loss: {val_loss.item():.10f}')

print("Training finished!")

Epoch [20/10000], Training Loss: 0.0089101819, Validation Loss: 0.0089048464
Epoch [40/10000], Training Loss: 0.0022561706, Validation Loss: 0.0095982170
Epoch [60/10000], Training Loss: 0.0005919900, Validation Loss: 0.0066674184
Epoch [80/10000], Training Loss: 0.0002852711, Validation Loss: 0.0062750010
Epoch [100/10000], Training Loss: 0.0001867977, Validation Loss: 0.0061051687
Epoch [120/10000], Training Loss: 0.0001382053, Validation Loss: 0.0058311392
Epoch [140/10000], Training Loss: 0.0001091015, Validation Loss: 0.0056471284
Epoch [160/10000], Training Loss: 0.0000927501, Validation Loss: 0.0055286880
Epoch [180/10000], Training Loss: 0.0000832239, Validation Loss: 0.0054362840
Epoch [200/10000], Training Loss: 0.0000769824, Validation Loss: 0.0053604208
Epoch [220/10000], Training Loss: 0.0000725170, Validation Loss: 0.0052981796
Epoch [240/10000], Training Loss: 0.0000691370, Validation Loss: 0.0052483887
Epoch [260/10000], Training Loss: 0.0000664721, Validation Loss: 0.0