In [13]:
import geopandas
import pandas as pd
import numpy as np
import os
import re

In [14]:
airports = pd.read_csv("/Users/JO/PhD/hemspy/data/helipad-data/raw-data/helipad-coordinates.csv", sep=";")

airports_gdf = geopandas.GeoDataFrame(airports, geometry=geopandas.points_from_xy(airports.longitude, airports.latitude), crs="EPSG:4326")
airports_gdf = airports_gdf.to_crs("EPSG:32634") #to metric coords
airports_gdf.geometry = airports_gdf.geometry.buffer(distance=airports.radius)
airports_gdf = airports_gdf.to_crs("EPSG:4326") #back to conventional

In [15]:
flights_path = '/Users/JO/PhD/hemspy/data/fr24-data/raw-data-unzipped-rearranged/flights'
flights_files = [f for f in os.listdir(flights_path) if f.endswith('.csv')]

flights_df_list, positions_df_list = [], []
for file in flights_files:
    flights_df_list.append(pd.read_csv(flights_path+'/'+file))

flights_df = pd.concat(flights_df_list, ignore_index=True)

flights_df['flight_id'] = flights_df['flight_id'].astype(int)

positions_path = '/Users/JO/PhD/hemspy/data/fr24-data/raw-data-unzipped-rearranged/positions'
positions_files = [os.path.join(positions_path, file) for file in os.listdir(positions_path) if file.endswith('.csv')]

# List to store DataFrames
positions_df_list = []

regex_pattern = r'_(.*?)\.'
regex = re.compile(regex_pattern)

# Iterate over each file
for file in positions_files:
    # Extract the flight_id from the file name
    flight_id_match = regex.search(os.path.basename(file))
    if flight_id_match:
        flight_id = flight_id_match.group(1)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file, usecols=['snapshot_id', 'altitude', 'latitude', 'longitude', 'speed'])
        
        # Add a column for flight_id
        df['flight_id'] = flight_id
        
        # Append the DataFrame to the list
        positions_df_list.append(df)

# Combine all DataFrames into a single DataFrame
positions_df = pd.concat(positions_df_list, ignore_index=True)

positions_df['flight_id'] = positions_df['flight_id'].astype(int)

d = pd.merge(positions_df, flights_df, on='flight_id', how='left')

d['UTC'] = pd.to_datetime(d['snapshot_id'], unit='s', utc=True)
d['date'] = d['UTC'].dt.date
d['year'] = d['UTC'].dt.year

## Decrease size of dataset
- Remove Stockholm
- Remove Sjöfartsverket
- Remove LN-XXX () and reg missing (subject to discussion)

In [16]:
d = d[d['equip'].isin(['A169', 'AS65', 'S76', 'EC45', 'A139'])]
d = d[~(d['reg'].isin(['SEJSR', 'SEJSP', 'SEJRH', 'SEJRI', 'SERJR', 'SEJRK', 'SEJRL', 'SEJRM', 'SEJRN']))]
d = d[~(d['reg'].str.startswith('LN') | pd.isna(d['reg']))]
d.head(3)

Unnamed: 0,snapshot_id,altitude,latitude,longitude,speed,flight_id,aircraft_id,reg,equip,callsign,flight,schd_from,schd_to,real_to,reserved,UTC,date,year
133,1486647941,175,59.32914,18.03285,106,208208779,4893263,SEJRO,EC45,,,BMA,,,,2017-02-09 13:45:41+00:00,2017-02-09,2017
134,1486647948,175,59.3244,18.02804,119,208208779,4893263,SEJRO,EC45,,,BMA,,,,2017-02-09 13:45:48+00:00,2017-02-09,2017
135,1486647976,174,59.32018,18.02427,123,208208779,4893263,SEJRO,EC45,,,BMA,,,,2017-02-09 13:46:16+00:00,2017-02-09,2017


In [17]:
d = geopandas.GeoDataFrame(d, geometry=geopandas.points_from_xy(d.longitude, d.latitude), crs="EPSG:4326")

In [18]:
d = geopandas.sjoin(d, airports_gdf, how="left", predicate="within")

In [19]:
d.drop(['index_right', 'ambulance_meetup', 'latitude_right', 'reserved', 'flight', 'callsign', 'longitude_right', 'radius', 'real_to', 'schd_from', 'schd_to', 'primary'], axis=1, inplace=True)
d

Unnamed: 0,snapshot_id,altitude,latitude_left,longitude_left,speed,flight_id,aircraft_id,reg,equip,UTC,date,year,geometry,hospital_name,icao,helipad_location,other_name
133,1486647941,175,59.32914,18.03285,106,208208779,4893263,SEJRO,EC45,2017-02-09 13:45:41+00:00,2017-02-09,2017,POINT (18.03285 59.32914),,,,
134,1486647948,175,59.32440,18.02804,119,208208779,4893263,SEJRO,EC45,2017-02-09 13:45:48+00:00,2017-02-09,2017,POINT (18.02804 59.32440),,,,
135,1486647976,174,59.32018,18.02427,123,208208779,4893263,SEJRO,EC45,2017-02-09 13:46:16+00:00,2017-02-09,2017,POINT (18.02427 59.32018),,,,
136,1486648031,175,59.30886,18.01559,123,208208779,4893263,SEJRO,EC45,2017-02-09 13:47:11+00:00,2017-02-09,2017,POINT (18.01559 59.30886),,,,
137,1486648045,175,59.27356,17.97905,141,208208779,4893263,SEJRO,EC45,2017-02-09 13:47:25+00:00,2017-02-09,2017,POINT (17.97905 59.27356),,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43409707,1587302348,4700,58.60624,18.25411,150,610337411,4893263,SEJRO,EC45,2020-04-19 13:19:08+00:00,2020-04-19,2020,POINT (18.25411 58.60624),,,,
43409708,1587302363,4725,58.60280,18.25499,150,610337411,4893263,SEJRO,EC45,2020-04-19 13:19:23+00:00,2020-04-19,2020,POINT (18.25499 58.60280),,,,
43409709,1587302379,4725,58.59201,18.25795,150,610337411,4893263,SEJRO,EC45,2020-04-19 13:19:39+00:00,2020-04-19,2020,POINT (18.25795 58.59201),,,,
43409710,1587302400,4700,58.58066,18.26129,150,610337411,4893263,SEJRO,EC45,2020-04-19 13:20:00+00:00,2020-04-19,2020,POINT (18.26129 58.58066),,,,


In [37]:
d_airport_only = d.dropna(subset="hospital_name")

In [38]:
d_airport_only

Unnamed: 0,snapshot_id,altitude,latitude_left,longitude_left,speed,flight_id,aircraft_id,reg,equip,UTC,date,year,geometry,hospital_name,icao,helipad_location,other_name
264,1532272145,800,59.44423,13.35251,68,490283653,4893443,SEJXC,EC45,2018-07-22 15:09:05+00:00,2018-07-22,2018,POINT (13.35251 59.44423),Karlstads sjukhus,ESOK,airport,Karlstad flygplats
265,1532272154,725,59.44395,13.34744,51,490283653,4893443,SEJXC,EC45,2018-07-22 15:09:14+00:00,2018-07-22,2018,POINT (13.34744 59.44395),Karlstads sjukhus,ESOK,airport,Karlstad flygplats
266,1532272169,600,59.44331,13.34189,31,490283653,4893443,SEJXC,EC45,2018-07-22 15:09:29+00:00,2018-07-22,2018,POINT (13.34189 59.44331),Karlstads sjukhus,ESOK,airport,Karlstad flygplats
747,1638187315,800,59.84322,17.63800,36,704972899,4893442,SEJXB,EC45,2021-11-29 12:01:55+00:00,2021-11-29,2021,POINT (17.63800 59.84322),Akademiska sjukhuset,ESHU,hospital,Akademiska sjukhuset
748,1638187317,800,59.84346,17.63817,33,704972899,4893442,SEJXB,EC45,2021-11-29 12:01:57+00:00,2021-11-29,2021,POINT (17.63817 59.84346),Akademiska sjukhuset,ESHU,hospital,Akademiska sjukhuset
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43409505,1532755117,0,57.78199,11.86633,1,491457644,4893290,SEJSJ,A169,2018-07-28 05:18:37+00:00,2018-07-28,2018,POINT (11.86633 57.78199),Sahlgrenska universitetssjukhuset,ESGP,airport,Säve flygplats
43409506,1532755138,50,57.78222,11.86707,20,491457644,4893290,SEJSJ,A169,2018-07-28 05:18:58+00:00,2018-07-28,2018,POINT (11.86707 57.78222),Sahlgrenska universitetssjukhuset,ESGP,airport,Säve flygplats
43409507,1532755149,225,57.78380,11.87109,50,491457644,4893290,SEJSJ,A169,2018-07-28 05:19:09+00:00,2018-07-28,2018,POINT (11.87109 57.78380),Sahlgrenska universitetssjukhuset,ESGP,airport,Säve flygplats
43409508,1532755157,275,57.78557,11.87485,69,491457644,4893290,SEJSJ,A169,2018-07-28 05:19:17+00:00,2018-07-28,2018,POINT (11.87485 57.78557),Sahlgrenska universitetssjukhuset,ESGP,airport,Säve flygplats


Next, sort by time

In [39]:
d_airport_only = d_airport_only.groupby('aircraft_id').apply(lambda x: x.sort_values('UTC'), include_groups=False)


In [40]:
d_airport_only.query("reg == 'SEJSN'").hospital_name.value_counts()

hospital_name
Karolinska universitetssjukhuset, Solna    18644
Visby lasarett                               672
Akademiska sjukhuset                         559
Länssjukhuset Ryhov                          505
Länssjukhuset Kalmar                         447
Sahlgrenska universitetssjukhuset            435
Universitetssjukhuset i Linköping            422
Kristianstads sjukhus                        390
Nyköpings lasarett                           290
Vrinnevisjukhuset                            282
Sundsvalls sjukhus                           201
Mälarsjukhuset                               199
Karlstads sjukhus                            190
Universitetssjukhuset Örebro                 166
Bollnäs sjukhus                              150
Hudiksvalls sjukhus                          143
Hallands sjukhus Halmstad                    143
Västerås lasarett                            129
Falu lasarett                                125
Huddinge sjukhus                             125
Örnskö

In [87]:
def process(x):
    # Keep only first and last row at a place
    shifted = x['other_name'].shift()
    start_seq = (x['other_name'] != shifted)
    end_seq = (x['other_name'] != x['other_name'].shift(-1))
    seq_indices = start_seq | end_seq
    new_x = x[seq_indices].reset_index(drop=True)
    # Check if moved
    new_x['moved'] = new_x['other_name'] != new_x['other_name'].shift(1)
    new_x['dwelled'] = ~ new_x['moved']
    # timetiff
    new_x["timediff"] = new_x.UTC - new_x.UTC.shift()
    new_x["timediff"] = new_x["timediff"].dt.total_seconds() / 60
    new_x['move_time'] = new_x['timediff'] * new_x['moved']
    new_x['move_time'] = np.where(new_x['move_time'] == 0, np.nan, new_x['move_time'])
    new_x['dwell_time'] = new_x['timediff'] * new_x['dwelled']
    new_x['dwell_time'] = np.where(new_x['dwell_time'] == 0, np.nan, new_x['dwell_time'])
    ## remove flyovers
    #new_x['dwell_time_flyover_removed'] = np.where(new_x['dwell_time'] > 5, new_x['dwell_time'], np.nan)
   # new_x['pair_id'] = np.arange(len(new_x)) // 2
   # new_x = new_x.groupby(['hospital_name', 'pair_id']).filter(
   # lambda x: x['dwell_time_flyover_removed'].sum() > 5).drop(columns='pair_id')
    ## clean up
   # new_x['timediff_new'] = new_x['UTC'] - new_x.shift(1)['UTC']
   # new_x["timediff_new"] = new_x["timediff_new"].dt.total_seconds() / 60

   # new_x['move_time_new'] = new_x['timediff_new'] * new_x['moved']
   # new_x['move_time'] = np.where(new_x['move_time_new'] == 0, np.nan, new_x['move_time_new'])
  #  new_x['dwell_time_new'] = new_x['timediff_new'] * new_x['dwelled']
  #  new_x['dwell_time'] = np.where(new_x['dwell_time_new'] == 0, np.nan, new_x['dwell_time_new'])

  #  new_x.drop(columns=['timediff', 'move_time_new', 'dwell_time_new', 'dwell_time_flyover_removed', 'timediff_new'], inplace=True)
    return new_x

In [88]:
df_processed = d_airport_only.groupby('aircraft_id').apply(process)

# Reset index if needed
df_processed = df_processed.reset_index(drop=True)

In [89]:
df_processed.query("reg == 'SEJSK'").tail(40)

Unnamed: 0,snapshot_id,altitude,latitude_left,longitude_left,speed,flight_id,reg,equip,UTC,date,...,geometry,hospital_name,icao,helipad_location,other_name,moved,dwelled,timediff,move_time,dwell_time
30362,1715074631,1150,57.77284,11.88854,87,890888102,SEJSK,A169,2024-05-07 09:37:11+00:00,2024-05-07,...,POINT (11.88854 57.77284),Sahlgrenska universitetssjukhuset,ESGP,airport,Säve flygplats,True,False,3.683333,3.683333,
30363,1715076043,325,57.78937,11.87268,77,890891588,SEJSK,A169,2024-05-07 10:00:43+00:00,2024-05-07,...,POINT (11.87268 57.78937),Sahlgrenska universitetssjukhuset,ESGP,airport,Säve flygplats,False,True,23.533333,,23.533333
30364,1715081288,850,60.43373,15.4822,64,890891588,SEJSK,A169,2024-05-07 11:28:08+00:00,2024-05-07,...,POINT (15.48220 60.43373),Falu lasarett,ESSD,airport,Borlänge flygplats,True,False,87.416667,87.416667,
30365,1715082467,1375,60.44051,15.49432,87,890909870,SEJSK,A169,2024-05-07 11:47:47+00:00,2024-05-07,...,POINT (15.49432 60.44051),Falu lasarett,ESSD,airport,Borlänge flygplats,False,True,19.65,,19.65
30366,1715084950,175,61.73414,17.09513,33,890909870,SEJSK,A169,2024-05-07 12:29:10+00:00,2024-05-07,...,POINT (17.09513 61.73414),Hudiksvalls sjukhus,ESHX,hospital,Hudiksvalls sjukhus,True,False,41.383333,41.383333,
30367,1715087116,225,61.72734,17.10581,66,890926570,SEJSK,A169,2024-05-07 13:05:16+00:00,2024-05-07,...,POINT (17.10581 61.72734),Hudiksvalls sjukhus,ESHX,hospital,Hudiksvalls sjukhus,False,True,36.1,,36.1
30368,1715090042,225,59.89574,17.62235,69,890926570,SEJSK,A169,2024-05-07 13:54:02+00:00,2024-05-07,...,POINT (17.62235 59.89574),Akademiska sjukhuset,ESCM,airport,Ärna flygplats,True,False,48.766667,48.766667,
30369,1715158899,100,59.88647,17.61601,102,891145155,SEJSK,A169,2024-05-08 09:01:39+00:00,2024-05-08,...,POINT (17.61601 59.88647),Akademiska sjukhuset,ESCM,airport,Ärna flygplats,False,True,1147.616667,,1147.616667
30370,1715159609,0,59.63188,17.08592,87,891145155,SEJSK,A169,2024-05-08 09:13:29+00:00,2024-05-08,...,POINT (17.08592 59.63188),Enköpings lasarett,,hospital,Enköpings lasarett,True,False,11.833333,11.833333,
30371,1715162091,150,59.62871,17.08649,84,891153337,SEJSK,A169,2024-05-08 09:54:51+00:00,2024-05-08,...,POINT (17.08649 59.62871),Enköpings lasarett,,hospital,Enköpings lasarett,False,True,41.366667,,41.366667


In [95]:
def plot_flight(idx, dataset):
    fl = dataset[dataset["flight_id"] == idx]
    return fl.explore()

In [99]:
d.query("flight_id == 891718517").explore()

TypeError: Object of type Timestamp is not JSON serializable

<folium.folium.Map at 0xae8fa4b50>