This notebook is for calculating distances, elevations and timings between Dovetrek checkpoints. It then produces a routecard for a set route.

###Contents:
* [Imports](#Imports)
* [Start SparkSession](#Start-SparkSession)
* [Set Parameters](#set-params)
* [Get Secrets](#get-secrets)
* [Fetch Checkpoint Information](#fetch-cp-info)
* [Fetch Distances from Filestore](#)
* [Calculate leg timings for each Checkpoint Combination](#cp-combo-times)
* [Pick a route](#pick-route)
* [Calculate route timings](#route-timings)
* [Calculate checkpoint arrival and departure times for route](#route-times)

### Imports

In [22]:
from pyspark.sql.functions import col, expr, first
from pyspark.sql import functions as F, DataFrame, Window, SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, FloatType, ArrayType, TimestampType
from datetime import datetime, time, timedelta
import requests
import time
import ipywidgets
import base64
import getpass
import builtins
import json
from io import BytesIO, StringIO
import itertools
from functools import reduce
import numpy as np

### Start SparkSession

In [23]:
try:
    SparkSession.active()
except:
    spark = SparkSession.builder \
        .appName("BinderPySpark") \
        .config("spark.sql.execution.arrow.pyspark.enabled", "true") \
        .getOrCreate()
else:
    SparkSession.active()
finally:
    # Test if Spark is working
    df = spark.createDataFrame([(1, "Alice"), (2, "Bob")], ["id", "name"])
    df.show()
    df.unpersist()

+---+-----+
| id| name|
+---+-----+
|  1|Alice|
|  2|  Bob|
+---+-----+



### Get available competition years

In [24]:
url = f"https://api.github.com/repos/liamj-f/Dovetrek/contents/CheckpointData"
params = {"ref": "main"}
response = requests.get(url, params=params)
response.json()
# Extract the 'name' field from each item in the JSON response
Year_List = [item['name'] for item in response.json() if 'name' in item]

# Extract the year using string slicing
Year_List = [filename.split('_')[1].split('.')[0] for filename in Year_List]

### Set Parameters

In [28]:
API_Service_List = ["Bing Maps","Google Maps", "OpenRouteService","Azure Maps & OpenTopoData"]

Competition_Year_Picker = ipywidgets.Dropdown(options=Year_List, value = '2025', description = 'Pick a competition year:')
API_Service_Picker = ipywidgets.Dropdown(options=API_Service_List, value = "Google Maps", description = 'Pick an API Service for distance & elevations:')
StopTimePicker = ipywidgets.IntSlider(value = 7
                                            , min = 0
                                            , max= 20
                                            , description = 'Pick a time in minutes to stop at checkpoints'
                                            , readout = True
                                          , step = 1)
NaismithPicker = ipywidgets.IntSlider(value = 10
                                            , min = 0
                                            , max= 20
                                            , description = 'Add minutes per 100m elevation gain'
                                            , readout = True
                                          , step = 1)
SpeedPicker = ipywidgets.FloatSlider(
                                          value=5.3,
                                          min=0,
                                          max=10.0,
                                          step=0.1,
                                          description='Hiking speed:',
                                          disabled=False,
                                          continuous_update=False,
                                          orientation='horizontal',
                                          readout=True,
                                          readout_format='.1f',
                                      )
StartTimePicker = ipywidgets.Text(
                                              description='Type a StartTime in the format HH:MM:SS',
                                              value = '10:00:00',
                                              disabled=False
                                          )
display(Competition_Year_Picker)
display(API_Service_Picker)
display(StopTimePicker)
display(NaismithPicker)
display(StartTimePicker)
display(SpeedPicker)
print("Make sure all of the parameters are set correctly before entering your GitHub Secrets repository Personal Access Token below")
GitHubPAT = getpass.getpass("Token:")

Dropdown(description='Pick a competition year:', index=4, options=('2017', '2018', '2019', '2024', '2025'), va‚Ä¶

Dropdown(description='Pick an API Service for distance & elevations:', index=1, options=('Bing Maps', 'Google ‚Ä¶

IntSlider(value=7, description='Pick a time in minutes to stop at checkpoints', max=20)

IntSlider(value=10, description='Add minutes per 100m elevation gain', max=20)

Text(value='10:00:00', description='Type a StartTime in the format HH:MM:SS')

FloatSlider(value=5.3, continuous_update=False, description='Hiking speed:', max=10.0, readout_format='.1f')

Make sure all of the parameters are set correctly before entering your GitHub Secrets repository Personal Access Token below
Token:¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


### Check parameters correctly set

In [29]:
Competition_Year = Competition_Year_Picker.value
API_Service = API_Service_Picker.value
StopTimeAtCheckPoints = StopTimePicker.value
AddMinutesPer100mHeight = NaismithPicker.value
Speed = SpeedPicker.value
StartTime = datetime.strptime(StartTimePicker.value, '%H:%M:%S')

FinishTime = StartTime + timedelta(hours=7)
print(f"Competition Year: {Competition_Year} \nAPI Service: {API_Service} \nStopTimeAtCheckPoints: {StopTimeAtCheckPoints} minutes \nAddMinutesPer100mHeight: {AddMinutesPer100mHeight} minutes \nSpeed: {Speed} km/h \nStartTime: {StartTime}")

Competition Year: 2018 
API Service: Bing Maps 
StopTimeAtCheckPoints: 14 minutes 
AddMinutesPer100mHeight: 15 minutes 
Speed: 2.3 km/h 
StartTime: 1900-01-01 10:15:00


### Get Secrets

In [None]:
# üîπ GitHub API URL to get the file
url = f"https://api.github.com/repos/liamj-f/Secrets/contents/Dovetrek_Secrets.json"

# üîπ GitHub API headers
headers = {"Authorization": f"token {GitHubPAT}"}

# üîπ Fetch the secrets file
response = requests.get(url, headers=headers)

if response.status_code == 200:
    # Decode Base64 content
    content = response.json()["content"]
    decoded_content = base64.b64decode(content).decode("utf-8")

    # Load JSON into a dictionary
    secrets = json.loads(decoded_content)

    # üîπ Assign values to variables
    BingMapsAPIKey = secrets.get("BingMApsAPIKey", "")
    AzureMapsAPIKey = secrets.get("AzureMapsAPIKey", "")
    OrdnanceSurveyAPIKey = secrets.get("OrdnanceSurveyAPIKey", "")
    OpenRouteServiceAPIKey = secrets.get("OpenRouteServiceAPIKey", "")
    GoogleMapsAPIKey = secrets.get("GoogleMapsAPIKey", "")
    DovetrekRepoPAT = secrets.get("DovetrekRepoPAT", "")

    print("‚úÖ Secrets loaded successfully!")

else:
    print(f"‚ùå Error: {response.status_code} - {response.text}")


### Fetch Checkpoint information

In [None]:
# GitHub API URL for file content
url = f"https://api.github.com/repos/liamj-f/Dovetrek/contents/CheckpointData/Openings_{Competition_Year}.csv"

# Fetch file content
response = requests.get(url)
content = response.json()["content"]
decoded_content = base64.b64decode(content).decode("utf-8")
csv_lines = decoded_content.split("\n")
# Create an RDD from the list
rdd = spark.sparkContext.parallelize(csv_lines)
# Convert RDD to DataFrame
openings_df = spark.read.csv(rdd, header=True, inferSchema=True)

### List Filestore contents

In [None]:
url = f"https://api.github.com/repos/liamj-f/Dovetrek/contents/DataFrames"

params = {"ref": "FileStore"}

response = requests.get(url, params=params)

response.json()



### Get the Latest File

In [None]:
import fnmatch
import urllib
# Define the pattern with wildcard
pattern = "Distances_DF_2025_"+API_Service+"_*.csv"

# Filter files matching the pattern
matched_files = [file for file in response.json() if fnmatch.fnmatch(file["name"], pattern)]

# Sort by date in filename (YYYY-MM-DD at the end)
matched_files.sort(key=lambda x: x["name"].split("_")[-1], reverse=True)

# Get the latest matching file
latest_file = matched_files[0] if matched_files else None

if latest_file:
    print("Latest matching file:", latest_file["name"])
    print("Download URL:", latest_file["download_url"])
else:
    print("No matching files found.")

latest_file = latest_file['name']
latest_file = urllib.parse.quote(latest_file)


### Fetch Distances_DF csv to GitHub filestore

In [None]:

url = f"https://api.github.com/repos/liamj-f/Dovetrek/contents/DataFrames/{latest_file}"

params = {"ref": "FileStore"}
# Get the latest SHA (if the file exists)
response = requests.get(url, params=params)
content = response.json()["content"]
decoded_content = base64.b64decode(content).decode("utf-8")
csv_lines = decoded_content.split("\n")
# Create an RDD from the list
rdd = spark.sparkContext.parallelize(csv_lines)
# Convert RDD to DataFrame
Distances_DF = spark.read.csv(rdd, header=True, inferSchema=True)

display(Distances_DF)

### Calculate leg timings for each Checkpoint Combination

In [None]:
Distances_DF = Distances_DF.withColumn("TimeInMinutes",(col("Distance")/ Speed)*60 + (col("Height_Gain") / AddMinutesPer100mHeight ) )
display(Distances_DF)

### Pick a Route

In [None]:
# Define the route and start time
route = ["Start","CP3", "E7U", "CP4", "E8", "E6", "CP5", "E5", "E4R", "CP6", "E9U","CP2","E2R","E10U", "E1", "CP1", "CP7", "E3", "Finish"]

### Calculate route timings

In [None]:
# Create a DataFrame for the route sequence
route_df = spark.createDataFrame([(route[i], route[i+1], i) for i in range(len(route)-1)],
                                 ["StartCP", "FinishCP", "RouteOrder"])

# Join with Distances_Df to get TimeInMinutes for each leg
timing_results_df = (route_df
      .join(Distances_DF, ["StartCP", "FinishCP"], "left")
      .orderBy("RouteOrder"))

# Use a window function to calculate cumulative time
window_spec = Window.orderBy("RouteOrder").rowsBetween(Window.unboundedPreceding, 0)
timing_results_df = timing_results_df.withColumn("CumulativeTime", F.sum("TimeInMinutes").over(window_spec))

# Show results
display(timing_results_df)

### Calculate checkpoint arrival and departure times for route

In [None]:
# Define window specification to order rows by RouteOrder
window_spec = Window.orderBy("RouteOrder")

# Calculate the CumulativeTimeWithDwell (CumulativeTime + StopTimeAtCheckpoints)
timing_results_df = timing_results_df.withColumn(
    "CumulativeTimeWithDwell",
    F.col("CumulativeTime") + (F.col("RouteOrder") * F.lit(StopTimeAtCheckPoints))
)

# Calculate the LeavingTime (StartTime + CumulativeTimeWithDwell)
timing_results_df = timing_results_df.withColumn(
    "LeavingTime",
    F.to_timestamp(F.lit(StartTime)) + F.col("CumulativeTimeWithDwell").cast("int").cast("interval minute")
)

# Calculate the ArrivalTime (LeavingTime - StopTimeAtCheckpoints)
timing_results_df = timing_results_df.withColumn(
    "ArrivalTime",
    F.col("LeavingTime") - F.expr(f"INTERVAL {StopTimeAtCheckPoints} MINUTE")
)

# Show the results
display(timing_results_df)
