<a href="https://colab.research.google.com/github/indhu68/Intro_to_DL_Project/blob/main/RTML_Main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch.nn as nn
import matplotlib.pyplot as plt


In [2]:
# Load data
from google.colab import drive
drive.mount('/content/drive')
data = pd.read_csv('/content/drive/My Drive/Kasungu_Telemetry_Pts_Oct23.csv', parse_dates=['Time.Stamp'])
data = data[["Index","Tag", "Latitude", "Longitude", "Time.Stamp"]]
data['Time.Stamp'] = pd.to_datetime(data['Time.Stamp'])
data.sort_values(by=['Tag', 'Time.Stamp'], inplace=True)

Mounted at /content/drive


In [3]:
# Calculate speed and distances
epsilon = 1e-5  # Small constant to avoid division by zero
data['Time_diff'] = data.groupby('Tag')['Time.Stamp'].diff().dt.total_seconds().fillna(0)
data['Lat_diff'] = data.groupby('Tag')['Latitude'].diff().fillna(0)
data['Lon_diff'] = data.groupby('Tag')['Longitude'].diff().fillna(0)
data['Speed'] = np.sqrt(data['Lat_diff']**2 + data['Lon_diff']**2) / (data['Time_diff'] + epsilon)

In [4]:
# Handle potential infinite or NaN values
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.fillna(data.mean(), inplace=True)  # Fill NaNs with the mean of the column


In [5]:
# Normalize features
scaler = MinMaxScaler()
data[['Latitude', 'Longitude', 'Lat_diff', 'Lon_diff', 'Speed']] = scaler.fit_transform(
    data[['Latitude', 'Longitude', 'Lat_diff', 'Lon_diff', 'Speed']]
)

In [6]:
def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data)):
        end_ix = i + n_steps + 1
        if end_ix > len(data):
            break
        seq_x = data.iloc[i:end_ix-1].to_numpy()
        seq_y = data.iloc[end_ix-1][['Latitude', 'Longitude']].to_numpy()
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# Prepare sequences
grouped = data.groupby('Tag')
X, y = [], []
for _, group in grouped:
    sequences = create_sequences(group[['Latitude', 'Longitude', 'Lat_diff', 'Lon_diff', 'Speed']], n_steps=5)
    X.append(sequences[0])
    y.append(sequences[1])

X = np.concatenate(X)
y = np.concatenate(y)


In [7]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [9]:
# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(5,32)
        self.lstm = nn.LSTM(input_size=32, hidden_size=128, num_layers=3, batch_first=True)
        self.fc = nn.Linear(128, 2)

    def forward(self, x):
        x = self.fc1(x)
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

model = LSTMModel().to('cuda')
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [10]:
def train(model, dataloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}')

# Prepare data loaders
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=True)

# Train the model
train(model, train_loader, criterion, optimizer)


Epoch 1, Loss: 0.000694218862209818
Epoch 2, Loss: 2.363142190976811e-05
Epoch 3, Loss: 1.8318997013472273e-05
Epoch 4, Loss: 1.6594050099878602e-05
Epoch 5, Loss: 1.5442011992024492e-05
Epoch 6, Loss: 1.4452663257021364e-05
Epoch 7, Loss: 1.3492196730636292e-05
Epoch 8, Loss: 1.3535400747145868e-05
Epoch 9, Loss: 1.2673534802387248e-05
Epoch 10, Loss: 1.2237672715460871e-05


In [11]:
import torch

# Evaluate Function
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    sample_count = 0  # Initialize the sample counter

    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            total_loss += loss.item()

            # Print actual and predicted values for the first 10 samples
            if sample_count < 10:
                for actual, predicted in zip(y_batch, predictions):
                    print(f'Actual: {actual.cpu().numpy()}, Predicted: {predicted.cpu().numpy()}')
                    sample_count += 1
                    if sample_count >= 10:
                        break

    # Print the average loss
    print(f'\nAverage Loss: {total_loss / len(dataloader)}')

# Example usage
# Assuming 'model', 'test_loader', and 'criterion' are already defined
evaluate(model, test_loader, criterion)


Actual: [0.3329295  0.78440124], Predicted: [0.33421972 0.7847123 ]
Actual: [0.4349434  0.89339054], Predicted: [0.43734732 0.89525217]
Actual: [0.3239687  0.80752194], Predicted: [0.329485  0.8051759]
Actual: [0.29325098 0.77780116], Predicted: [0.29280734 0.7822648 ]
Actual: [0.5302073  0.60972446], Predicted: [0.5315625 0.5957586]
Actual: [0.3223936 0.6810553], Predicted: [0.32495123 0.6821847 ]
Actual: [0.2812679 0.7039108], Predicted: [0.2828736  0.70421046]
Actual: [0.6209335  0.71571374], Predicted: [0.6246111 0.7161058]
Actual: [0.49855474 0.7646976 ], Predicted: [0.5003591  0.76558983]
Actual: [0.18040508 0.59452546], Predicted: [0.17910984 0.5867238 ]

Average Loss: 1.0925312754072499e-05


In [12]:
import torch
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(5, 32)
        self.rnn = nn.RNN(input_size=32, hidden_size=128, num_layers=3, batch_first=True)
        self.fc = nn.Linear(128, 2)

    def forward(self, x):
        x = self.fc1(x)
        out, _ = self.rnn(x)
        return self.fc(out[:, -1, :])

rnn_model = RNNModel().to('cuda')
criterion_rnn = nn.MSELoss()
optimizer_rnn = torch.optim.Adam(rnn_model.parameters(), lr=0.001)


In [13]:
def train(model, dataloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}')

# Prepare data loaders
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=True)

# Train the model
train(model, train_loader, criterion, optimizer)


Epoch 1, Loss: 1.1902292742952133e-05
Epoch 2, Loss: 1.1762569800450362e-05
Epoch 3, Loss: 1.1426485440491971e-05
Epoch 4, Loss: 1.1262521739503699e-05
Epoch 5, Loss: 1.1091893240014162e-05
Epoch 6, Loss: 1.1086855351411204e-05
Epoch 7, Loss: 1.1021604616677645e-05
Epoch 8, Loss: 1.0775050233610734e-05
Epoch 9, Loss: 1.0727119036216882e-05
Epoch 10, Loss: 1.069307076484041e-05


In [14]:
import torch

# Evaluate Function
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    sample_count = 0  # Initialize the sample counter

    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            total_loss += loss.item()

            # Print actual and predicted values for the first 10 samples
            if sample_count < 10:
                for actual, predicted in zip(y_batch, predictions):
                    print(f'Actual: {actual.cpu().numpy()}, Predicted: {predicted.cpu().numpy()}')
                    sample_count += 1
                    if sample_count >= 10:
                        break

    # Print the average loss
    print(f'\nAverage Loss: {total_loss / len(dataloader)}')

# Example usage
# Assuming 'model', 'test_loader', and 'criterion' are already defined
evaluate(model, test_loader, criterion)


Actual: [0.46375427 0.78844017], Predicted: [0.46440214 0.79019743]
Actual: [0.4819719 0.8002212], Predicted: [0.48341405 0.79956317]
Actual: [0.46771684 0.6616714 ], Predicted: [0.4713208 0.6655133]
Actual: [0.4699237 0.7037069], Predicted: [0.47256953 0.703285  ]
Actual: [0.41720405 0.8026573 ], Predicted: [0.417163   0.80552536]
Actual: [0.5008492 0.6637181], Predicted: [0.5032791  0.65886444]
Actual: [0.438075 0.849946], Predicted: [0.4396276 0.847418 ]
Actual: [0.2813803 0.7111361], Predicted: [0.28179872 0.71358055]
Actual: [0.49512002 0.7366627 ], Predicted: [0.4960655 0.7364174]
Actual: [0.47034848 0.69747025], Predicted: [0.47208422 0.6985734 ]

Average Loss: 1.0264667840022784e-05


In [15]:
class GRUModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(5, 32)
        self.gru = nn.GRU(input_size=32, hidden_size=128, num_layers=3, batch_first=True)
        self.fc = nn.Linear(128, 2)

    def forward(self, x):
        x = self.fc1(x)
        out, _ = self.gru(x)
        return self.fc(out[:, -1, :])

gru_model = GRUModel().to('cuda')
criterion_gru = nn.MSELoss()
optimizer_gru = torch.optim.Adam(gru_model.parameters(), lr=0.001)


In [16]:
def train(model, dataloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}')

# Prepare data loaders
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=True)

# Train the model
train(model, train_loader, criterion, optimizer)


Epoch 1, Loss: 1.0439942129530562e-05
Epoch 2, Loss: 1.045315089497516e-05
Epoch 3, Loss: 1.0412983895918973e-05
Epoch 4, Loss: 1.0241018224906148e-05
Epoch 5, Loss: 1.0253787625705345e-05
Epoch 6, Loss: 1.0228292008670922e-05
Epoch 7, Loss: 1.0257761380903183e-05
Epoch 8, Loss: 1.0175824001341843e-05
Epoch 9, Loss: 1.0023025816745823e-05
Epoch 10, Loss: 1.0077576707204017e-05


In [17]:
import torch

# Evaluate Function
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    sample_count = 0  # Initialize the sample counter

    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            total_loss += loss.item()

            # Print actual and predicted values for the first 10 samples
            if sample_count < 10:
                for actual, predicted in zip(y_batch, predictions):
                    print(f'Actual: {actual.cpu().numpy()}, Predicted: {predicted.cpu().numpy()}')
                    sample_count += 1
                    if sample_count >= 10:
                        break

    # Print the average loss
    print(f'\nAverage Loss: {total_loss / len(dataloader)}')

# Example usage
# Assuming 'model', 'test_loader', and 'criterion' are already defined
evaluate(model, test_loader, criterion)


Actual: [0.15817913 0.697936  ], Predicted: [0.155433 0.697458]
Actual: [0.29913282 0.69248897], Predicted: [0.3069551 0.6962958]
Actual: [0.22392523 0.6634714 ], Predicted: [0.21982333 0.6626216 ]
Actual: [0.44104153 0.75101244], Predicted: [0.44033432 0.7528136 ]
Actual: [0.49082646 0.6932996 ], Predicted: [0.49107802 0.69492984]
Actual: [0.49132952 0.72646767], Predicted: [0.4931522  0.73474765]
Actual: [0.5677836  0.69172364], Predicted: [0.5687713 0.693594 ]
Actual: [0.5219364 0.672719 ], Predicted: [0.51989067 0.6723095 ]
Actual: [0.18923949 0.6882931 ], Predicted: [0.18551998 0.68697935]
Actual: [0.24689299 0.7935146 ], Predicted: [0.24403493 0.7933068 ]

Average Loss: 1.0187548256263404e-05


In [18]:
import torch
import torch.nn as nn

class TransformerModel(nn.Module):
    def __init__(self, input_dim, num_heads, num_layers, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Linear(input_dim, hidden_dim)
        transformer_layer = nn.TransformerEncoderLayer(
            d_model=hidden_dim, nhead=num_heads, dim_feedforward=hidden_dim * 4, batch_first=True
        )
        self.transformer = nn.TransformerEncoder(transformer_layer, num_layers=num_layers)
        self.fc_out = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x)
        return self.fc_out(x[:, -1, :])

# Example initialization and setting up optimizer and loss
transformer_attention_model = TransformerModel(input_dim=5, num_heads=4, num_layers=3, hidden_dim=128, output_dim=2).to('cuda')
criterion_transformer = nn.MSELoss()
optimizer_transformer = torch.optim.Adam(transformer_attention_model.parameters(), lr=0.001)


In [19]:
def train(model, dataloader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}')

# Prepare data loaders
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=True)

# Train the model
train(model, train_loader, criterion, optimizer)


Epoch 1, Loss: 9.983622884863069e-06
Epoch 2, Loss: 9.967951133668944e-06
Epoch 3, Loss: 9.857809624612748e-06
Epoch 4, Loss: 9.93672460294843e-06
Epoch 5, Loss: 9.879981025545194e-06
Epoch 6, Loss: 9.834637814751983e-06
Epoch 7, Loss: 9.828848025058417e-06
Epoch 8, Loss: 9.8025992440049e-06
Epoch 9, Loss: 9.748260624470798e-06
Epoch 10, Loss: 9.704611033311836e-06


In [20]:


# Evaluate Function
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    sample_count = 0  # Initialize the sample counter

    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            total_loss += loss.item()

            # Print actual and predicted values for the first 10 samples
            if sample_count < 10:
                for actual, predicted in zip(y_batch, predictions):
                    print(f'Actual: {actual.cpu().numpy()}, Predicted: {predicted.cpu().numpy()}')
                    sample_count += 1
                    if sample_count >= 10:
                        break

    # Print the average loss
    print(f'\nAverage Loss: {total_loss / len(dataloader)}')

# Example usage
# Assuming 'model', 'test_loader', and 'criterion' are already defined
evaluate(model, test_loader, criterion)


Actual: [0.2888675 0.6440044], Predicted: [0.2927965 0.6404763]
Actual: [0.29327887 0.77694523], Predicted: [0.29583174 0.7765198 ]
Actual: [0.5390014 0.6138322], Predicted: [0.5423672 0.6152275]
Actual: [0.50507146 0.77018404], Predicted: [0.50661623 0.77154136]
Actual: [0.2599357 0.7014261], Predicted: [0.26037076 0.69787014]
Actual: [0.487284   0.75936216], Predicted: [0.4874707  0.75906146]
Actual: [0.49353948 0.7035727 ], Predicted: [0.495461   0.70303714]
Actual: [0.45249674 0.7430093 ], Predicted: [0.4557468 0.7455766]
Actual: [0.316535   0.65665066], Predicted: [0.3192597 0.6540148]
Actual: [0.3580421  0.85762525], Predicted: [0.3485177  0.85662746]

Average Loss: 1.027743973545413e-05


below code - don't use

In [None]:
# Evaluate Function
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to('cuda')
            y_batch = y_batch.to('cuda')
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            total_loss += loss.item()
    print(f'Average Loss: {total_loss / len(dataloader)}')

# Evaluate the model on the test set
evaluate(model, test_loader, criterion)

In [None]:
# Prediction Function
def predict_next_position(model, data, tag, timestamp, scaler, n_steps=5):
    model.eval()
    tag_data = data[data['Tag'] == tag].sort_values(by='Time.Stamp')

    idx = tag_data[tag_data['Time.Stamp'] <= timestamp].tail(n_steps).index
    if len(idx) < n_steps:
        return "Not enough data points to create a sequence."

    seq = tag_data.loc[idx, ['Latitude', 'Longitude', 'Lat_diff', 'Lon_diff', 'Speed']]
    seq_normalized = scaler.transform(seq)
    seq_tensor = torch.tensor(seq_normalized[np.newaxis, :], dtype=torch.float32).to('cuda')

    with torch.no_grad():
        predicted_position_normalized = model(seq_tensor)
        predicted_position_normalized = predicted_position_normalized.cpu().numpy()

    # Reshape the prediction to match the scaler's expected input
    predicted_position_normalized = np.insert(predicted_position_normalized, 2, [[0, 0, 0]], axis=1)

    # Inverse transform to get the actual latitude and longitude
    predicted_position = scaler.inverse_transform(predicted_position_normalized)[0, :2]

    return predicted_position

In [None]:
def predict_next_position(model, data, tag, timestamp, scaler, n_steps=5):
    model.eval()

    # Filter data for the specified tag
    tag_data = data[data['Tag'] == tag]

    # If the tag does not exist or not enough data points
    if tag_data.empty or tag_data[tag_data['Time.Stamp'] < timestamp].shape[0] < n_steps:
        return "Not enough data points to create a sequence.", None

    # Ensure the data is sorted by timestamp
    tag_data = tag_data.sort_values(by='Time.Stamp')

    # Get the last n_steps records before the given timestamp for the sequence
    sequence_data = tag_data[tag_data['Time.Stamp'] < timestamp][-n_steps:]

    # Process the sequence for prediction
    seq_scaled = scaler.transform(sequence_data[['Latitude', 'Longitude', 'Lat_diff', 'Lon_diff', 'Speed']])
    seq_tensor = torch.tensor(seq_scaled[np.newaxis, :], dtype=torch.float32).to('cuda')

    # Predict the next position
    with torch.no_grad():
        predicted_position_scaled = model(seq_tensor)
        predicted_position_scaled = predicted_position_scaled.cpu().numpy()

    # Reshape for inverse scaling
    dummy_data = np.zeros((1, 5))  # Assuming you have 5 features as before
    dummy_data[:, :2] = predicted_position_scaled[0, :2]

    # Inverse transform to get the actual latitude and longitude
    predicted_position = scaler.inverse_transform(dummy_data)[0, :2]

    return predicted_position,actual ,True

# Example usage
timestamp = pd.to_datetime('2022-07-14 11:08:00')  # Ensure this is within the date range of your data
tag = 5748  # Use an actual numerical Tag from your dataset
predicted_pos, actual, success = predict_next_position(model, data, tag, timestamp, scaler)

if not success:
    print(predicted_pos)
else:
    predicted_lat, predicted_lon = predicted_pos
    actual_lat, actual_lon = actual
    print(f'Predicted Position: Latitude {predicted_lat:.6f}, Longitude {predicted_lon:.6f}')
    print(f'Actual Position: Latitude {actual_lat:.6f}, Longitude {actual_lon:.6f}')
