In [None]:
%%capture
!pip install gdown contextily

In [None]:
import gdown

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import contextily as ctx
import tensorflow as tf

from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Embedding, Concatenate, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import feature_column as fc

from sklearn.model_selection import train_test_split
from geopy.distance import geodesic

In [None]:
%%capture

ROUTE_GPX = 'route.gpx'
ROUTE_GPX_URL = 'https://drive.google.com/file/d/1-kx84-fNAOuDWSdVEYU_DjP4Om0yHnaD/view?usp=sharing'
gdown.download(ROUTE_GPX_URL, ROUTE_GPX, quiet=True, fuzzy=True)

In [None]:
def parse_gpx(filepath):
    root = ET.parse(filepath).getroot()

    ns = '{http://www.topografix.com/GPX/1/1}'
    data = {
        'time': [], 'lat': [], 'lon': [],
    }

    for trk in root.findall(f".//{ns}trk"):
        for trkseg in trk.findall(f"{ns}trkseg"):
            for trkpt in trkseg.findall(f"{ns}trkpt"):
                data['lat'].append(float(trkpt.get('lat')))
                data['lon'].append(float(trkpt.get('lon')))

                time_elem = trkpt.find(f"{ns}time")
                data['time'].append(time_elem.text if time_elem is not None else None)

    df = pd.DataFrame(data)
    return df

df = parse_gpx(ROUTE_GPX)

df['time'] = pd.to_datetime(df['time'])
df['time'] = df['time'].dt.tz_convert('UTC')
df['time'] = (df['time'] - pd.Timestamp("1970-01-01", tz='UTC')) // pd.Timedelta('1s')

df.head()

In [None]:
def get_bounding_box(df):
    min_lat = df['lat'].min()
    max_lat = df['lat'].max()
    min_lon = df['lon'].min()
    max_lon = df['lon'].max()
    bottom_left = (min_lat, min_lon)
    top_right = (max_lat, max_lon)
    return (bottom_left, top_right)

bottom_left, top_right = get_bounding_box(df)
print(bottom_left, top_right)

In [None]:
# Define how many buckets you want for each dimension
num_lat_buckets = 100
num_lon_buckets = 100

bottom_left_lat, bottom_left_lon = bottom_left
top_right_lat, top_right_lon = top_right

lat_range = top_right_lat - bottom_left_lat
lon_range = top_right_lon - bottom_left_lon

boundaries_latitude = [bottom_left_lat + (i * (lat_range / num_lat_buckets)) for i in range(1, num_lat_buckets)]
boundaries_longitude = [bottom_left_lon + (i * (lon_range / num_lon_buckets)) for i in range(1, num_lon_buckets)]

print(boundaries_latitude)
print(boundaries_longitude)

In [None]:
# Define time input
time_input = Input(shape=(1,), name="time")
time_normalized = tf.keras.layers.Normalization()(time_input)

# Define latitude and longitude inputs
latitude_input = Input(shape=(1,), name="latitude")
longitude_input = Input(shape=(1,), name="longitude")

# Apply normalization and discretization (bucketizing)
latitude_normalized = tf.keras.layers.Normalization()(latitude_input)
longitude_normalized = tf.keras.layers.Normalization()(longitude_input)

latitude_bucketized = tf.keras.layers.Discretization(bin_boundaries=boundaries_latitude)(latitude_normalized)
longitude_bucketized = tf.keras.layers.Discretization(bin_boundaries=boundaries_longitude)(longitude_normalized)

# Concatenate the inputs
concatenated_inputs = tf.keras.layers.Concatenate()([time_normalized, latitude_bucketized, longitude_bucketized])

# Create a model using the Functional API
x = Dense(128, activation='relu')(concatenated_inputs)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.3)(x)

# Output layer for predicting latitude and longitude
output = Dense(2, activation='linear', name='predicted_coordinates')(x)

def scale_output(x):
    min_lat, min_lon = bottom_left
    max_lat, max_lon = top_right
    lat = (x[:, 0] + 1) / 2 * (max_lat - min_lat) + min_lat  # Scaling from [-1, 1] to [min_lat, max_lat]
    lon = (x[:, 1] + 1) / 2 * (max_lon - min_lon) + min_lon  # Scaling from [-1, 1] to [min_lon, max_lon]
    return tf.stack([lat, lon], axis=1)

# Apply custom scaling layer
scaled_output = tf.keras.layers.Lambda(scale_output)(output)

# Define the model
model = Model(inputs=[time_input, latitude_input, longitude_input], outputs=scaled_output)

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Print the model summary
model.summary()

In [None]:
X = df[['time', 'lat', 'lon']]
y = df[['lat', 'lon']]

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_split = {
    'latitude': X_train['lat'].values,
    'longitude': X_train['lon'].values,
    'time': X_train['time'].values,
}
X_val_split = {
    'latitude': X_val['lat'].values,
    'longitude': X_val['lon'].values,
    'time': X_val['time'].values,
}

# Ensure y_train and y_val are structured correctly
y_train_combined = np.column_stack((y_train['lat'].values, y_train['lon'].values))
y_val_combined = np.column_stack((y_val['lat'].values, y_val['lon'].values))

# Fit the model
model.fit(
    x=X_train_split,
    y=y_train_combined,  # Use combined array
    validation_data=(X_val_split, y_val_combined),  # Use combined array
    epochs=100,
    batch_size=32,
    callbacks=[EarlyStopping(patience=10, restore_best_weights=True)]
)

In [None]:
def haversine_distance(lat1, lon1, lat2, lon2):
    return geodesic((lat1, lon1), (lat2, lon2)).meters

def predict_over_distance(model, initial_point, max_distance=1000.0, time_step=60):
    predictions = []
    cumulative_distance = 0.0
    current_point = initial_point

    while cumulative_distance < max_distance:
        predicted_point = model.predict(x={
            'latitude': np.array([[current_point[1]]]),
            'longitude': np.array([[current_point[2]]]),
            'time': np.array([[current_point[0]]]),
        }, verbose=0)[0]
        print(predicted_point)

        # Calculate distance with the denormalized coordinates
        dist = haversine_distance(current_point[1], current_point[2], predicted_point[0], predicted_point[1])
        cumulative_distance += dist

        print(f"Current: ({current_point[1]:.6f}, {current_point[2]:.6f}) | "
              f"Predicted: ({predicted_point[0]:.6f}, {predicted_point[1]:.6f}) | "
              f"Distance : {dist:.2f} m")

        # Update time, keeping it in its original scale
        new_time = current_point[0] + time_step

        # Store prediction in original scale
        predictions.append([new_time, predicted_point[0], predicted_point[1]])

        # Update current point for next iteration, but time should remain in original scale
        current_point = [new_time, predicted_point[0], predicted_point[1]]

        # Break if cumulative distance meets or exceeds the target
        if cumulative_distance >= max_distance:
            break

    return predictions

initial_time = df['time'].iloc[-1]
initial_lat = df['lat'].iloc[-1]
initial_lon = df['lon'].iloc[-1]
initial_point = [initial_time, initial_lat, initial_lon]

predicted_path = predict_over_distance(model, initial_point)

# Convert predictions to a DataFrame for easy viewing
predicted_df = pd.DataFrame(predicted_path, columns=['time', 'lat', 'lon'])
print(predicted_df)