In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn

# Dataset is now stored in a Pandas Dataframe

In [2]:
data = pd.read_csv("threetrasco_file.csv")
data.head()

Unnamed: 0,datetime,LAT,LONG,SPEED,PATH
0,2023-05-19T02:00:48Z,8.314704,124.251052,0.0,1
1,2023-05-19T02:00:49Z,8.3147,124.251056,0.000586,1
2,2023-05-19T02:00:50Z,8.314696,124.251058,0.001022,1
3,2023-05-19T02:00:51Z,8.314693,124.251057,0.001307,1
4,2023-05-19T02:00:52Z,8.31469,124.251056,0.00154,1


In [3]:
data = data[['datetime', 'LAT', 'LONG', 'PATH']]
data

Unnamed: 0,datetime,LAT,LONG,PATH
0,2023-05-19T02:00:48Z,8.314704,124.251052,1
1,2023-05-19T02:00:49Z,8.314700,124.251056,1
2,2023-05-19T02:00:50Z,8.314696,124.251058,1
3,2023-05-19T02:00:51Z,8.314693,124.251057,1
4,2023-05-19T02:00:52Z,8.314690,124.251056,1
...,...,...,...,...
709602,2023-05-20T04:00:49Z,8.180791,124.217538,15
709603,2023-05-20T04:00:50Z,8.180791,124.217538,15
709604,2023-05-20T04:00:51Z,8.180791,124.217538,15
709605,2023-05-20T04:00:52Z,8.180791,124.217538,15


In [4]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [5]:
data['datetime'] = pd.to_datetime(data['datetime'])

In [6]:
from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)

    df.set_index('datetime', inplace=True)

    for i in range(1, n_steps+1):
        if (i < 5):
            df[f'LAT(n+{5-i})'] = df['LAT'].shift(i)
            df[f'LONG(n+{5-i})'] = df['LONG'].shift(i)
            df[f'PATH(n+{5-i})'] = df['PATH'].shift(i)
        else:
            df[f'LAT(t-{i-5})'] = df['LAT'].shift(i)
            df[f'LONG(t-{i-5})'] = df['LONG'].shift(i)
            df[f'PATH(t-{i-5})'] = df['PATH'].shift(i)

    df.dropna(inplace=True)
    df = df.drop(df.columns[[0, 1, 2]], axis=1)

    return df

lookback = 20
features = 5
shifted_df = prepare_dataframe_for_lstm(data, lookback)
pd.set_option('display.max_columns', None)
shifted_df

Unnamed: 0_level_0,LAT(n+4),LONG(n+4),PATH(n+4),LAT(n+3),LONG(n+3),PATH(n+3),LAT(n+2),LONG(n+2),PATH(n+2),LAT(n+1),LONG(n+1),PATH(n+1),LAT(t-0),LONG(t-0),PATH(t-0),LAT(t-1),LONG(t-1),PATH(t-1),LAT(t-2),LONG(t-2),PATH(t-2),LAT(t-3),LONG(t-3),PATH(t-3),LAT(t-4),LONG(t-4),PATH(t-4),LAT(t-5),LONG(t-5),PATH(t-5),LAT(t-6),LONG(t-6),PATH(t-6),LAT(t-7),LONG(t-7),PATH(t-7),LAT(t-8),LONG(t-8),PATH(t-8),LAT(t-9),LONG(t-9),PATH(t-9),LAT(t-10),LONG(t-10),PATH(t-10),LAT(t-11),LONG(t-11),PATH(t-11),LAT(t-12),LONG(t-12),PATH(t-12),LAT(t-13),LONG(t-13),PATH(t-13),LAT(t-14),LONG(t-14),PATH(t-14),LAT(t-15),LONG(t-15),PATH(t-15)
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1
2023-05-19 02:01:17+00:00,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314683,124.251048,1.0,8.314682,124.251051,1.0,8.314680,124.251053,1.0,8.314679,124.251056,1.0,8.314680,124.251056,1.0,8.314684,124.251054,1.0,8.314686,124.251055,1.0,8.314688,124.251057,1.0,8.314690,124.251056,1.0,8.314693,124.251057,1.0,8.314696,124.251058,1.0,8.314700,124.251056,1.0,8.314704,124.251052,1.0
2023-05-19 02:01:18+00:00,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314683,124.251048,1.0,8.314682,124.251051,1.0,8.314680,124.251053,1.0,8.314679,124.251056,1.0,8.314680,124.251056,1.0,8.314684,124.251054,1.0,8.314686,124.251055,1.0,8.314688,124.251057,1.0,8.314690,124.251056,1.0,8.314693,124.251057,1.0,8.314696,124.251058,1.0,8.314700,124.251056,1.0
2023-05-19 02:01:19+00:00,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314683,124.251048,1.0,8.314682,124.251051,1.0,8.314680,124.251053,1.0,8.314679,124.251056,1.0,8.314680,124.251056,1.0,8.314684,124.251054,1.0,8.314686,124.251055,1.0,8.314688,124.251057,1.0,8.314690,124.251056,1.0,8.314693,124.251057,1.0,8.314696,124.251058,1.0
2023-05-19 02:01:20+00:00,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314683,124.251048,1.0,8.314682,124.251051,1.0,8.314680,124.251053,1.0,8.314679,124.251056,1.0,8.314680,124.251056,1.0,8.314684,124.251054,1.0,8.314686,124.251055,1.0,8.314688,124.251057,1.0,8.314690,124.251056,1.0,8.314693,124.251057,1.0
2023-05-19 02:01:21+00:00,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314704,124.251026,2.0,8.314683,124.251048,1.0,8.314682,124.251051,1.0,8.314680,124.251053,1.0,8.314679,124.251056,1.0,8.314680,124.251056,1.0,8.314684,124.251054,1.0,8.314686,124.251055,1.0,8.314688,124.251057,1.0,8.314690,124.251056,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-20 04:00:49+00:00,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0
2023-05-20 04:00:50+00:00,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0
2023-05-20 04:00:51+00:00,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0
2023-05-20 04:00:52+00:00,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0,8.180791,124.217538,15.0


In [7]:
shifted_df_as_np = shifted_df.to_numpy()
shifted_df_as_np

array([[  8.314704, 124.251026,   2.      , ...,   8.314704, 124.251052,
          1.      ],
       [  8.314704, 124.251026,   2.      , ...,   8.3147  , 124.251056,
          1.      ],
       [  8.314704, 124.251026,   2.      , ...,   8.314696, 124.251058,
          1.      ],
       ...,
       [  8.180791, 124.217538,  15.      , ...,   8.180791, 124.217538,
         15.      ],
       [  8.180791, 124.217538,  15.      , ...,   8.180791, 124.217538,
         15.      ],
       [  8.180791, 124.217538,  15.      , ...,   8.180791, 124.217538,
         15.      ]])

In [8]:
scaler = MinMaxScaler(feature_range=(-1, 1))
shifted_df_as_np = scaler.fit_transform(shifted_df_as_np)

In [9]:
X = shifted_df_as_np[:, 15:]
y = shifted_df_as_np[:, 0:15]

X.shape, y.shape

((709587, 45), (709587, 15))

In [10]:
X = dc(np.flip(X, axis=1))
X

array([[-1.        ,  0.44063829,  0.79265404, ..., -1.        ,
         0.43974988,  0.79265404],
       [-1.        ,  0.44077497,  0.79260052, ..., -1.        ,
         0.43974988,  0.79265404],
       [-1.        ,  0.44084331,  0.792547  , ..., -1.        ,
         0.43974988,  0.79265404],
       ...,
       [ 1.        , -0.70453264, -0.99918378, ...,  1.        ,
        -0.70453264, -0.99918378],
       [ 1.        , -0.70453264, -0.99918378, ...,  1.        ,
        -0.70453264, -0.99918378],
       [ 1.        , -0.70453264, -0.99918378, ...,  1.        ,
        -0.70453264, -0.99918378]])

In [11]:
split_index = int(len(X)*0.8)

X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((567669, 45), (141918, 45), (567669, 15), (141918, 15))

In [12]:
X_train = X_train.reshape((-1, (lookback-5)*3, 1))
X_test = X_test.reshape((-1, (lookback-5)*3, 1))

y_train = y_train.reshape((-1, features*3, 1))
y_test = y_test.reshape((-1, features*3, 1))

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((567669, 45, 1), (141918, 45, 1), (567669, 15, 1), (141918, 15, 1))

In [13]:
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([567669, 45, 1]),
 torch.Size([141918, 45, 1]),
 torch.Size([567669, 15, 1]),
 torch.Size([141918, 15, 1]))

In [18]:
# Separate predictor values and target values
df = shifted_df
predictors = df.iloc[:, features*3:].values  # Assuming predictor columns start from the second column
targets = df.iloc[:, :features*3].values     # Assuming target columns are the first two columns

# Normalize predictor values and target values separately
scaler = MinMaxScaler(feature_range=(-1, 1))
predictors = scaler.fit_transform(predictors)
target_scaler = MinMaxScaler(feature_range=(-1, 1))
targets = target_scaler.fit_transform(targets)

# Split dataset into training and validation sets
train_size = int(0.8 * len(df))  # Adjust the train/validation split ratio as needed
train_predictors, val_predictors = predictors[:train_size], predictors[train_size:]
train_targets, val_targets = targets[:train_size], targets[train_size:]

# Convert dataset into sequences of fixed length
sequence_length = lookback*3  # Set the desired sequence length
train_sequences = []
val_sequences = []

for i in range(len(train_predictors) - sequence_length):
    train_sequences.append((train_predictors[i:i+sequence_length], train_targets[i+sequence_length]))

for i in range(len(val_predictors) - sequence_length):
    val_sequences.append((val_predictors[i:i+sequence_length], val_targets[i+sequence_length]))

"""# Define a custom Dataset class
class GPXDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        x = self.sequences[idx][0]
        y = self.sequences[idx][1]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Create DataLoader objects for efficient batch processing
batch_size = 16  # Set the desired batch size
train_dataset = GPXDataset(train_sequences)
val_dataset = GPXDataset(val_sequences)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
"""

class GPXDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        # Extract the first 15 columns as y (target)
        y = self.sequences[idx][1]

        # Extract the rest of the columns as x (predictors)
        x = self.sequences[idx][0]

        # Convert to torch tensors
        x_tensor = torch.tensor(x, dtype=torch.float32)
        y_tensor = torch.tensor(y, dtype=torch.float32)

        return x_tensor, y_tensor
    
batch_size = 100
output_size = 15
train_dataset = GPXDataset(train_sequences)
val_dataset = GPXDataset(val_sequences)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)



In [19]:
# Define the LSTM model and other necessary components
"""class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 15)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out"""
    
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_stacked_layers, batch_size, self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# Set the device to be used (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model and move it to the device
model = LSTM((lookback-5)*3, 4, 1, output_size)  # Adjust the input_size, hidden_size, and num_stacked_layers as needed
model.to(device)

# Define the loss function, optimizer, and other hyperparameters
learning_rate = 0.001
num_epochs = 2
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Function to train one epoch
def train_one_epoch():
    model.train()
    print(f'Epoch: {epoch + 1}')
    running_loss = 0.0

    for batch_index, batch in enumerate(train_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        output = model(x_batch)
        loss = loss_function(output, y_batch)
        running_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch_index % 100 == 99:  # Print every 100 batches
            avg_loss_across_batches = running_loss / 100
            print('Batch {0}, Loss: {1:.9f}'.format(batch_index+1, avg_loss_across_batches))
            running_loss = 0.0
    print()

# Function to validate one epoch
def validate_one_epoch():
    model.train(False)
    running_loss = 0.0

    for batch_index, batch in enumerate(val_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)

        with torch.no_grad():
            output = model(x_batch)
            loss = loss_function(output, y_batch.view(1, -1))
            running_loss += loss.item()

    avg_loss_across_batches = running_loss / len(val_loader)

    print('Val Loss: {0:.6f}'.format(avg_loss_across_batches))
    print('***************************************************')
    print()

# Training loop
for epoch in range(num_epochs):
    train_one_epoch()
    validate_one_epoch()

Epoch: 1
Batch 100, Loss: 0.306282092
Batch 200, Loss: 0.152153728
Batch 300, Loss: 0.077263501
Batch 400, Loss: 0.045964313
Batch 500, Loss: 0.029730667
Batch 600, Loss: 0.020637245
Batch 700, Loss: 0.015019413
Batch 800, Loss: 0.010429905
Batch 900, Loss: 0.007613745
Batch 1000, Loss: 0.005806595
Batch 1100, Loss: 0.004372346
Batch 1200, Loss: 0.003334392
Batch 1300, Loss: 0.002726463
Batch 1400, Loss: 0.002134245
Batch 1500, Loss: 0.001736363
Batch 1600, Loss: 0.001371084
Batch 1700, Loss: 0.001233778
Batch 1800, Loss: 0.000942937
Batch 1900, Loss: 0.000809847
Batch 2000, Loss: 0.000685442
Batch 2100, Loss: 0.000587030
Batch 2200, Loss: 0.000517525
Batch 2300, Loss: 0.000453954
Batch 2400, Loss: 0.000370787
Batch 2500, Loss: 0.000322011
Batch 2600, Loss: 0.000300199
Batch 2700, Loss: 0.000257003
Batch 2800, Loss: 0.000267878
Batch 2900, Loss: 0.000227118
Batch 3000, Loss: 0.000246666
Batch 3100, Loss: 0.000188871
Batch 3200, Loss: 0.000198926
Batch 3300, Loss: 0.000209325
Batch 3400

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (15) must match the size of tensor b (1500) at non-singleton dimension 1