In [310]:
import geopandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [311]:
# Load sample flights
# First, two well behaved flights from a sending center to a receiving center
well_behaved_a = "/Users/JO/PhD/hemspy/data/sample-data/hems-examples/DFL5710_35200a2b.csv"
well_behaved_b = "/Users/JO/PhD/hemspy/data/sample-data/hems-examples/SWV5960_3526909c.csv"
# Next, a bunch of trickier flights
tricky_a =  "/Users/JO/PhD/hemspy/data/sample-data/hems-examples/SWV5850_345f09a8.csv"
tricky_b_part_1 =  "/Users/JO/PhD/hemspy/data/sample-data/hems-examples/SWV5850_34bac707.csv"
tricky_b_part_2 = "/Users/JO/PhD/hemspy/data/sample-data/hems-examples/SWV5850_34bad6ce.csv"
tricky_c_part_1 = "/Users/JO/PhD/hemspy/data/sample-data/hems-examples/DFL5940_34f7c0ea.csv"
tricky_c_part_2 = "/Users/JO/PhD/hemspy/data/sample-data/hems-examples/DFL5940_34f82b91.csv"

# Define location of some helipads at sending and receving hospitals
airports_dict = {
"hudiksvall": (61.7304, 17.0996), ## sending hospital
"akademiska": (59.8472, 17.6402), ## this is a receiving center
"arna": (59.8920, 17.6066), ## this is an aiport
"karolinska": (59.3493, 18.0319), ## this is a receiving center
"torsby": (60.1367, 12.9974), ## sending hospital
"orebro": (59.2748, 15.2288), ## this is a receiving center
"orebro airport": (59.22781316906504, 15.03996851019997),
"sunderbyn": (65.67241724908286, 21.935790886203712), ## sending hospital
"kalix": (65.85566778285997, 23.171565979697636), ## sending hospital
"umea": (63.81735866549869, 20.298453547845288), ## sending hospital
"falun": (60.60763426546648, 15.645371300139645), ## sending hospital
"gallivare": (67.13107231727535, 20.682936085413385), ## sending hospital
"solleftea": (63.17472146034687, 17.2314090459214), ## sending hospital
"ostersund": (63.191907852275854, 14.63084350106227),
"karlstad sjukhus": (59.37630930990766, 13.47608275695035),
"karlstad flp": (59.445175903584, 13.337658312889811),
"västerås lasarett": (59.61580394969142, 16.582052775226977)
}

airports = pd.DataFrame(airports_dict).T.reset_index().rename(columns={"index": "airport", 0: "latitude", 1: "longitude"})

In [312]:
## make geometries
airports_gdf = geopandas.GeoDataFrame(airports, geometry=geopandas.points_from_xy(airports.longitude, airports.latitude), crs="EPSG:4326")
airports_gdf = airports_gdf.to_crs("EPSG:32634") #to metric coords
airports_gdf.geometry = airports_gdf.geometry.buffer(distance=1100)
airports_gdf = airports_gdf.to_crs("EPSG:4326") #back to conventional

In [313]:
def create_flight_df(csv_path, airports):
    flight = pd.read_csv(csv_path)
    flight[['latitude', 'longitude']] = flight['Position'].str.split(',', expand=True)
    flight = geopandas.GeoDataFrame(flight, geometry=geopandas.points_from_xy(flight.longitude, flight.latitude), crs="EPSG:4326")
    flight = geopandas.sjoin(flight, airports, how="left", predicate="within")
    flight.drop(['Position', 'index_right', 'latitude_left', 'latitude_right', 'latitude_right', 'longitude_right', 'longitude_left'], axis=1, inplace=True)
    return flight

In [314]:
def plot_flight_df(flight_df, airports):
    m = flight_df.explore()
    return airports.explore(m=m, color='maroon', style_kwds={'fillOpacity': 0.1, 'weight':2})

### A well behaved flight

In [315]:
df1 = create_flight_df(well_behaved_a, airports_gdf)

In [316]:
plot_flight_df(df1, airports_gdf)

### A trickier flight

In [317]:
df2 = create_flight_df(tricky_a, airports_gdf)

In [318]:
plot_flight_df(df2, airports_gdf)

### Working with merged flights and preprocessing

- Merge flights
- Plot
- Remove rows "between" airports
- Remove as many row as possible "at" airports
- Remove "fly-overs"
- Calculate times

First, import and merge df:

In [319]:
import os
# Define the folder path containing the CSV files
folder_path = '/Users/JO/PhD/hemspy/data/sample-data/karlstad-examples'

# Get a list of all CSV files in the folder and read them into DataFrames
data_frames = [pd.read_csv(os.path.join(folder_path, f)) for f in os.listdir(folder_path) if f.endswith('.csv')]

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(data_frames, ignore_index=True)

Create and plot flight:

In [320]:
def create_flight_df(df, airports):
    flight = df.copy()
    flight[['latitude', 'longitude']] = flight['Position'].str.split(',', expand=True)
    flight = geopandas.GeoDataFrame(flight, geometry=geopandas.points_from_xy(flight.longitude, flight.latitude), crs="EPSG:4326")
    flight = geopandas.sjoin(flight, airports, how="left", predicate="within")
    flight.drop(['Position', 'index_right', 'latitude_left', 'latitude_right', 'latitude_right', 'longitude_right', 'longitude_left'], axis=1, inplace=True)
    return flight

In [321]:
k = create_flight_df(combined_df, airports_gdf)

In [322]:
plot_flight_df(k, airports_gdf)

As you can see, there is a fly-over (aircraft flying over a "landing zone" at Örebro. Even if the landing zone had been smallar (100-200 m) it would still be within it.

Let's remove all rows "between" landing zones:

In [323]:
k.sort_values(by="Timestamp", inplace=True)
k_airport_only = k.dropna(subset="airport")

Next, keep only the first and last row when an aircraft is within a landing zone. This will be done by shifting columns up and down an compare values in column airport. This allows for sorting rows without a for loop.

In [324]:
# Sample data

# Shift the column to create a comparison for finding sequence boundaries
shifted = k_airport_only['airport'].shift()

# Identify the start of each sequence
start_sequence = (k_airport_only['airport'] != shifted)

# Identify the end of each sequence
end_sequence = (k_airport_only['airport'] != k_airport_only['airport'].shift(-1))

# Combine the indices of the start and end of each sequence
sequence_indices = start_sequence | end_sequence

# Use the combined indices to filter the DataFrame
new_k = k_airport_only[sequence_indices].reset_index(drop=True)

new_k

Unnamed: 0,Timestamp,UTC,Callsign,Altitude,Speed,Direction,geometry,airport
0,1714390457,2024-04-29T11:34:17Z,SWV5960,400,65,276,POINT (13.33136 59.44318),karlstad flp
1,1714390485,2024-04-29T11:34:45Z,SWV5960,775,76,348,POINT (13.32414 59.45121),karlstad flp
2,1714391666,2024-04-29T11:54:26Z,SWV5960,650,83,346,POINT (13.00722 60.12868),torsby
3,1714394667,2024-04-29T12:44:27Z,SWV5960,725,68,156,POINT (13.00369 60.12790),torsby
4,1714398356,2024-04-29T13:45:56Z,SWV5960,575,61,127,POINT (17.63800 59.85654),akademiska
5,1714401289,2024-04-29T14:34:49Z,SWV5960,475,75,192,POINT (17.64304 59.83841),akademiska
6,1714404013,2024-04-29T15:20:13Z,SWV5960,1800,106,244,POINT (15.24362 59.27774),orebro
7,1714404049,2024-04-29T15:20:49Z,SWV5960,1800,105,244,POINT (15.21228 59.27012),orebro
8,1714404260,2024-04-29T15:24:20Z,SWV5960,150,58,192,POINT (15.04349 59.23727),orebro airport
9,1714406151,2024-04-29T15:55:51Z,SWV5960,250,60,278,POINT (15.03024 59.21943),orebro airport


Next, we will define if each row is "associated" with the aircraft moving in to the zone or "dwelling" (but potentially moving) in it. This is to mark the rows for "entering" or "leaving a zone" (could be better names for the columns actually...)

In [325]:
new_k['moved'] = new_k['airport'] != new_k['airport'].shift(1)
new_k['dwelled'] = ~ new_k['moved']
new_k

Unnamed: 0,Timestamp,UTC,Callsign,Altitude,Speed,Direction,geometry,airport,moved,dwelled
0,1714390457,2024-04-29T11:34:17Z,SWV5960,400,65,276,POINT (13.33136 59.44318),karlstad flp,True,False
1,1714390485,2024-04-29T11:34:45Z,SWV5960,775,76,348,POINT (13.32414 59.45121),karlstad flp,False,True
2,1714391666,2024-04-29T11:54:26Z,SWV5960,650,83,346,POINT (13.00722 60.12868),torsby,True,False
3,1714394667,2024-04-29T12:44:27Z,SWV5960,725,68,156,POINT (13.00369 60.12790),torsby,False,True
4,1714398356,2024-04-29T13:45:56Z,SWV5960,575,61,127,POINT (17.63800 59.85654),akademiska,True,False
5,1714401289,2024-04-29T14:34:49Z,SWV5960,475,75,192,POINT (17.64304 59.83841),akademiska,False,True
6,1714404013,2024-04-29T15:20:13Z,SWV5960,1800,106,244,POINT (15.24362 59.27774),orebro,True,False
7,1714404049,2024-04-29T15:20:49Z,SWV5960,1800,105,244,POINT (15.21228 59.27012),orebro,False,True
8,1714404260,2024-04-29T15:24:20Z,SWV5960,150,58,192,POINT (15.04349 59.23727),orebro airport,True,False
9,1714406151,2024-04-29T15:55:51Z,SWV5960,250,60,278,POINT (15.03024 59.21943),orebro airport,False,True


Calculate the time differences between the row and the preceeding row. In case it is a "moved" row, this would be "flight time". In case it is a "dwelled" row, this would be the time spent in the zone.

In [326]:
# calc timediff
new_k["timediff_minutes"] = new_k.Timestamp - new_k.Timestamp.shift()
new_k["timediff_minutes"] = np.round(new_k.timediff_minutes/60, 0)

In [327]:
new_k

Unnamed: 0,Timestamp,UTC,Callsign,Altitude,Speed,Direction,geometry,airport,moved,dwelled,timediff_minutes
0,1714390457,2024-04-29T11:34:17Z,SWV5960,400,65,276,POINT (13.33136 59.44318),karlstad flp,True,False,
1,1714390485,2024-04-29T11:34:45Z,SWV5960,775,76,348,POINT (13.32414 59.45121),karlstad flp,False,True,0.0
2,1714391666,2024-04-29T11:54:26Z,SWV5960,650,83,346,POINT (13.00722 60.12868),torsby,True,False,20.0
3,1714394667,2024-04-29T12:44:27Z,SWV5960,725,68,156,POINT (13.00369 60.12790),torsby,False,True,50.0
4,1714398356,2024-04-29T13:45:56Z,SWV5960,575,61,127,POINT (17.63800 59.85654),akademiska,True,False,61.0
5,1714401289,2024-04-29T14:34:49Z,SWV5960,475,75,192,POINT (17.64304 59.83841),akademiska,False,True,49.0
6,1714404013,2024-04-29T15:20:13Z,SWV5960,1800,106,244,POINT (15.24362 59.27774),orebro,True,False,45.0
7,1714404049,2024-04-29T15:20:49Z,SWV5960,1800,105,244,POINT (15.21228 59.27012),orebro,False,True,1.0
8,1714404260,2024-04-29T15:24:20Z,SWV5960,150,58,192,POINT (15.04349 59.23727),orebro airport,True,False,4.0
9,1714406151,2024-04-29T15:55:51Z,SWV5960,250,60,278,POINT (15.03024 59.21943),orebro airport,False,True,32.0


We can create new columns with "move time" and "dwell time" as well:

In [328]:
new_k['move_time'] = new_k['timediff_minutes'] * new_k['moved']
new_k['move_time'] = np.where(new_k['move_time'] == 0, np.nan, new_k['move_time'])
new_k['dwell_time'] = new_k['timediff_minutes'] * new_k['dwelled']
new_k['dwell_time'] = np.where(new_k['dwell_time'] == 0, np.nan, new_k['dwell_time'])

new_k

Unnamed: 0,Timestamp,UTC,Callsign,Altitude,Speed,Direction,geometry,airport,moved,dwelled,timediff_minutes,move_time,dwell_time
0,1714390457,2024-04-29T11:34:17Z,SWV5960,400,65,276,POINT (13.33136 59.44318),karlstad flp,True,False,,,
1,1714390485,2024-04-29T11:34:45Z,SWV5960,775,76,348,POINT (13.32414 59.45121),karlstad flp,False,True,0.0,,
2,1714391666,2024-04-29T11:54:26Z,SWV5960,650,83,346,POINT (13.00722 60.12868),torsby,True,False,20.0,20.0,
3,1714394667,2024-04-29T12:44:27Z,SWV5960,725,68,156,POINT (13.00369 60.12790),torsby,False,True,50.0,,50.0
4,1714398356,2024-04-29T13:45:56Z,SWV5960,575,61,127,POINT (17.63800 59.85654),akademiska,True,False,61.0,61.0,
5,1714401289,2024-04-29T14:34:49Z,SWV5960,475,75,192,POINT (17.64304 59.83841),akademiska,False,True,49.0,,49.0
6,1714404013,2024-04-29T15:20:13Z,SWV5960,1800,106,244,POINT (15.24362 59.27774),orebro,True,False,45.0,45.0,
7,1714404049,2024-04-29T15:20:49Z,SWV5960,1800,105,244,POINT (15.21228 59.27012),orebro,False,True,1.0,,1.0
8,1714404260,2024-04-29T15:24:20Z,SWV5960,150,58,192,POINT (15.04349 59.23727),orebro airport,True,False,4.0,4.0,
9,1714406151,2024-04-29T15:55:51Z,SWV5960,250,60,278,POINT (15.03024 59.21943),orebro airport,False,True,32.0,,32.0


Next, let's filter out visits to landing zones that are probably fly-overs. Even a slow moving aircraft at 150 km/h is travelling at 41.6 m/s, meaning that it would take <1 minute to travel 2 km. In the case of the fly-over at Örebro, the time is rounded down to 0 minutes. Allowing for some wiggle room, let's remove all visits < 5 minutes. Of course, the Örebro thing dissapears.

In [329]:
new_k['dwell_time_flyover_removed'] = np.where(new_k['dwell_time'] > 5, new_k['dwell_time'], np.nan)

In [330]:
new_k

Unnamed: 0,Timestamp,UTC,Callsign,Altitude,Speed,Direction,geometry,airport,moved,dwelled,timediff_minutes,move_time,dwell_time,dwell_time_flyover_removed
0,1714390457,2024-04-29T11:34:17Z,SWV5960,400,65,276,POINT (13.33136 59.44318),karlstad flp,True,False,,,,
1,1714390485,2024-04-29T11:34:45Z,SWV5960,775,76,348,POINT (13.32414 59.45121),karlstad flp,False,True,0.0,,,
2,1714391666,2024-04-29T11:54:26Z,SWV5960,650,83,346,POINT (13.00722 60.12868),torsby,True,False,20.0,20.0,,
3,1714394667,2024-04-29T12:44:27Z,SWV5960,725,68,156,POINT (13.00369 60.12790),torsby,False,True,50.0,,50.0,50.0
4,1714398356,2024-04-29T13:45:56Z,SWV5960,575,61,127,POINT (17.63800 59.85654),akademiska,True,False,61.0,61.0,,
5,1714401289,2024-04-29T14:34:49Z,SWV5960,475,75,192,POINT (17.64304 59.83841),akademiska,False,True,49.0,,49.0,49.0
6,1714404013,2024-04-29T15:20:13Z,SWV5960,1800,106,244,POINT (15.24362 59.27774),orebro,True,False,45.0,45.0,,
7,1714404049,2024-04-29T15:20:49Z,SWV5960,1800,105,244,POINT (15.21228 59.27012),orebro,False,True,1.0,,1.0,
8,1714404260,2024-04-29T15:24:20Z,SWV5960,150,58,192,POINT (15.04349 59.23727),orebro airport,True,False,4.0,4.0,,
9,1714406151,2024-04-29T15:55:51Z,SWV5960,250,60,278,POINT (15.03024 59.21943),orebro airport,False,True,32.0,,32.0,32.0


Next, let's assign id_numbers to the "pairs" defining the landing zone visits. This will allow for grouping by this ID and removing those with dwell times < 5 minutes>

In [331]:
new_k['pair_id'] = np.arange(len(new_k)) // 2
new_k = new_k.groupby(['airport', 'pair_id']).filter(
    lambda x: x['dwell_time_flyover_removed'].sum() > 5).drop(columns='pair_id')


In [332]:
new_k

Unnamed: 0,Timestamp,UTC,Callsign,Altitude,Speed,Direction,geometry,airport,moved,dwelled,timediff_minutes,move_time,dwell_time,dwell_time_flyover_removed
2,1714391666,2024-04-29T11:54:26Z,SWV5960,650,83,346,POINT (13.00722 60.12868),torsby,True,False,20.0,20.0,,
3,1714394667,2024-04-29T12:44:27Z,SWV5960,725,68,156,POINT (13.00369 60.12790),torsby,False,True,50.0,,50.0,50.0
4,1714398356,2024-04-29T13:45:56Z,SWV5960,575,61,127,POINT (17.63800 59.85654),akademiska,True,False,61.0,61.0,,
5,1714401289,2024-04-29T14:34:49Z,SWV5960,475,75,192,POINT (17.64304 59.83841),akademiska,False,True,49.0,,49.0,49.0
8,1714404260,2024-04-29T15:24:20Z,SWV5960,150,58,192,POINT (15.04349 59.23727),orebro airport,True,False,4.0,4.0,,
9,1714406151,2024-04-29T15:55:51Z,SWV5960,250,60,278,POINT (15.03024 59.21943),orebro airport,False,True,32.0,,32.0,32.0
10,1714408012,2024-04-29T16:26:52Z,SWV5960,200,64,203,POINT (13.34498 59.45284),karlstad flp,True,False,31.0,31.0,,
11,1714556048,2024-05-01T09:34:08Z,SWV5960,50,40,204,POINT (13.34280 59.45073),karlstad flp,False,True,2467.0,,2467.0,2467.0


After the removal of rows, we need to recalculate travel times:

In [333]:
new_k['timediff_minutes_new'] = new_k['Timestamp'] - new_k.shift(1)['Timestamp']
new_k["timediff_minutes_new"] = np.round(new_k.timediff_minutes_new/60, 0)

new_k['move_time_new'] = new_k['timediff_minutes_new'] * new_k['moved']
new_k['move_time'] = np.where(new_k['move_time_new'] == 0, np.nan, new_k['move_time_new'])
new_k['dwell_time_new'] = new_k['timediff_minutes_new'] * new_k['dwelled']
new_k['dwell_time'] = np.where(new_k['dwell_time_new'] == 0, np.nan, new_k['dwell_time_new'])

In [334]:
new_k.drop(columns=['timediff_minutes', 'move_time_new', 'dwell_time_new', 'dwell_time_flyover_removed', 'timediff_minutes_new'], inplace=True)

And finally:

In [335]:
new_k

Unnamed: 0,Timestamp,UTC,Callsign,Altitude,Speed,Direction,geometry,airport,moved,dwelled,move_time,dwell_time
2,1714391666,2024-04-29T11:54:26Z,SWV5960,650,83,346,POINT (13.00722 60.12868),torsby,True,False,,
3,1714394667,2024-04-29T12:44:27Z,SWV5960,725,68,156,POINT (13.00369 60.12790),torsby,False,True,,50.0
4,1714398356,2024-04-29T13:45:56Z,SWV5960,575,61,127,POINT (17.63800 59.85654),akademiska,True,False,61.0,
5,1714401289,2024-04-29T14:34:49Z,SWV5960,475,75,192,POINT (17.64304 59.83841),akademiska,False,True,,49.0
8,1714404260,2024-04-29T15:24:20Z,SWV5960,150,58,192,POINT (15.04349 59.23727),orebro airport,True,False,50.0,
9,1714406151,2024-04-29T15:55:51Z,SWV5960,250,60,278,POINT (15.03024 59.21943),orebro airport,False,True,,32.0
10,1714408012,2024-04-29T16:26:52Z,SWV5960,200,64,203,POINT (13.34498 59.45284),karlstad flp,True,False,31.0,
11,1714556048,2024-05-01T09:34:08Z,SWV5960,50,40,204,POINT (13.34280 59.45073),karlstad flp,False,True,,2467.0


## Ideas/insigths:

* This describes the actually events in a reasonable manner.

* Idea: check relevant helipads for ads-b coverage, select different radiuses based on coverage (smaller if better coverage). This could potentially be done by plotting all flights and guesstimate a reasonable radius.

* Insight: in matching flights to patients, it could be reasonable to add a mechanism making sure that a flight should note take more than 0.0.5-3 h (depending on distance) in order to protect against A->(C)->B flights.

* a less involved approach would be to just calculate inferred times and describe the variance. Hopefully there is little variation. Throw away all +/- x sd? 

* 
