In [1]:
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
import numpy as np
 
SEED = 42
rng = np.random.default_rng(SEED)

In [2]:
import sys
import os

# Adjust the path to the 'preprocess' folder relative to your notebook
preprocess_path = os.path.abspath(os.path.join('..', '..', 'preprocess'))
sys.path.append(preprocess_path)
# Import the necessary functions
try:
    from plot_transfer import plot_transfer
    from load_flight_data import load_flight_data
    from find_transfers import find_transfers
    print("Modules imported successfully")
except ModuleNotFoundError as e:
    print(f"Error importing modules: {e}")

Modules imported successfully


In [3]:
flight_data = load_flight_data()

In [4]:
transfer_flight_data = find_transfers(flight_data, min_dwell_time=15, max_transit_time=3, remove_outliers=False, outlier_factor=2, outlier_offset=5)

## Test inferred potential transfer flights

### Non-outliers (based on transit time x 2 of expected + 5 minute offset)
The offset is to account for very short travel times.

In [16]:
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

# Initialize DataFrame to store classifications
classification_results_non_outliers = pd.DataFrame(columns=['transfer_id', 'correctly_classified'])

# Counter to keep track of classified maps
map_count = 0

# Escape flag
escape = False

while not escape:
    # Select a random transfer ID from non-outliers
    transfer_id = rng.choice(transfer_flight_data.query('transit_time_outlier == False').transfer_id)
    
    # Print the relevant row of transfer_flight_data
    print(f"Number of maps classified: {map_count}")

    display(transfer_flight_data[transfer_flight_data['transfer_id'] == transfer_id][['transfer_id', 'hospital_name_sending', 'hospital_name_receiving', 'time_in_zone_sending', 'expected_transit_time', 'transit_time', 'transit_time_ratio']])
    
    # Plot the selected transfer using the plot_transfer function
    m = plot_transfer(flight_data, transfer_flight_data, transfer_id)
    display(m)
    
    # Prompt for classification
    classification = input(f'Classify Transfer ID {transfer_id} (y/n): ').strip().lower()
    
    # Validate input
    while classification not in ['y', 'n', 'exit']:
        print("Invalid input. Please enter 'y' or 'n'. To exit, type 'exit'.")
        classification = input(f'Classify Transfer ID {transfer_id} (y/n): ').strip().lower()
    
    # Check if user wants to exit
    if classification == 'exit':
        escape = True
        print("Exiting...")
        break
    
    # Save classification in DataFrame
    classification_results_non_outliers = pd.concat([classification_results_non_outliers, pd.DataFrame({'transfer_id': [transfer_id], 'correctly_classified': [classification]}).astype({'transfer_id': 'int', 'correctly_classified': 'str'})], ignore_index=True)
    
    # Increment map counter
    map_count += 1
        
    # Clear the output to remove the previous plot
    clear_output(wait=True)

Number of maps classified: 124


Unnamed: 0,transfer_id,hospital_name_sending,hospital_name_receiving,time_in_zone_sending,expected_transit_time,transit_time,transit_time_ratio
4493,5143.0,Universitetssjukhuset Örebro,Akademiska sjukhuset,68.933333,36.335046,1.883333,0.051832


ValueError: Location values cannot contain NaNs.

In [17]:
classification_results_non_outliers

Unnamed: 0,transfer_id,correctly_classified
0,2589,y
1,6867,y
2,9268,y
3,2568,y
4,6981,y
...,...,...
119,9356,y
120,2266,y
121,2324,y
122,331,y


In [18]:
classification_results_non_outliers['correctly_classified_bool'] = classification_results_non_outliers['correctly_classified'].map({'y': True, 'n': False})
print("No samples:", len(classification_results_non_outliers))
print("Specificity:", classification_results_non_outliers['correctly_classified_bool'].sum() / len(classification_results_non_outliers))

No samples: 124
Specificity: 0.9838709677419355


In [19]:
# Save classification results to CSV
from datetime import datetime
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
file_name = f'classification_results_non_outliers_{timestamp}.csv'
classification_results_non_outliers.to_csv(file_name, index=False)

## Outliers

In [20]:
# Initialize DataFrame to store classifications
classification_results_outliers = pd.DataFrame(columns=['transfer_id', 'correctly_classified'])

# Counter to keep track of classified maps
map_count = 0

# Escape flag
escape = False

while not escape:
    # Select a random transfer ID from non-outliers
    transfer_id = rng.choice(transfer_flight_data.query('transit_time_outlier == True').transfer_id)
    
    # Print the relevant row of transfer_flight_data
    print(f"Number of maps classified: {map_count}")

    display(transfer_flight_data[transfer_flight_data['transfer_id'] == transfer_id][['transfer_id', 'hospital_name_sending', 'hospital_name_receiving', 'time_in_zone_sending', 'expected_transit_time', 'transit_time', 'transit_time_ratio']])
    
    # Plot the selected transfer using the plot_transfer function
    m = plot_transfer(flight_data, transfer_flight_data, transfer_id)
    display(m)
    
    # Prompt for classification
    classification = input(f'Classify Transfer ID {transfer_id} (y/n): ').strip().lower()
    
    # Validate input
    while classification not in ['y', 'n', 'exit']:
        print("Invalid input. Please enter 'y' or 'n'. To exit, type 'exit'.")
        classification = input(f'Classify Transfer ID {transfer_id} (y/n): ').strip().lower()
    
    # Check if user wants to exit
    if classification == 'exit':
        escape = True
        print("Exiting...")
        break
    
    # Save classification in DataFrame
    classification_results_outliers = pd.concat([classification_results_non_outliers, pd.DataFrame({'transfer_id': [transfer_id], 'correctly_classified': [classification]}).astype({'transfer_id': 'int', 'correctly_classified': 'str'})], ignore_index=True)
    
    # Increment map counter
    map_count += 1
        
    # Clear the output to remove the previous plot
    clear_output(wait=True)

Number of maps classified: 51


Unnamed: 0,transfer_id,hospital_name_sending,hospital_name_receiving,time_in_zone_sending,expected_transit_time,transit_time,transit_time_ratio
7854,9566.0,Lycksele lasarett,Norrlands universitetssjukhus,554.266667,24.794619,119.2,4.807495


Exiting...


In [22]:
classification_results_outliers['correctly_classified_bool'] = classification_results_outliers['correctly_classified'].map({'y': True, 'n': False})
print("Specificity:", classification_results_outliers['correctly_classified_bool'].sum() / len(classification_results_outliers))

Specificity: 0.976


In [23]:
# Save classification results to CSV
from datetime import datetime
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
file_name = f'classification_results_outliers_{timestamp}.csv'
classification_results_outliers.to_csv(file_name, index=False)

Conclusion:
* Of the included flights, almost all are possible transfer flights. Of those exlucuded based on transit time, almost all seem like reasonable exclusions.
* Sofar we can estimate that the specificity > 95%
* What about sensitivity?

## Estimating sensitivity
Per se impossible since we have no gold standard. However, if we map some random flights we can check if relevant ones are caught. This is manual labor...

In [517]:
jsk = flight_data.query("reg == 'SEJRA'")

In [655]:
check_flight = rng.choice(jsk['flight_id'])
check_flight_reg = rng.choice(jsk['reg'])
check_flight_df = jsk.query(f'flight_id == {check_flight}')[['geometry', 'speed', 'altitude', 'UTC_str']]
check_flight_day = jsk.query(f'flight_id == {check_flight}')['date'].values[0]
check_flight_df.explore()

In [656]:
transfer_flight_data[transfer_flight_data.UTC_sending.dt.date == check_flight_day].query('reg_sending == "SEJRA"')

Unnamed: 0,transfer_id,hospital_name_sending,hospital_name_receiving,year_sending,reg_sending,UTC_sending,UTC_out_sending,time_in_zone_sending,UTC_receiving,zone_name_sending,...,radius_receiving,geometry_sending,geometry_receiving,estimated_distance,expected_transit_time,flight_id_receiving,aircraft_id_receiving,transit_time,transit_time_outlier,transit_time_ratio


In [660]:
#flight_data.query(f'flight_id == {check_flight}')
flight_data[flight_data.UTC.dt.date == check_flight_day][['UTC_str', 'geometry']]

Unnamed: 0_level_0,Unnamed: 1_level_0,UTC_str,geometry
aircraft_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4687591,22578229,2019-06-23 16:33:19+00:00,POINT (10.79662 60.44710)
4687591,22578230,2019-06-23 16:33:25+00:00,POINT (10.80093 60.44342)
4687591,22578231,2019-06-23 16:33:35+00:00,POINT (10.80751 60.43793)
4687591,22578232,2019-06-23 16:35:17+00:00,POINT (10.87902 60.37616)
4687591,22578233,2019-06-23 16:35:31+00:00,POINT (10.88716 60.36727)
...,...,...,...
4893443,43329311,2019-06-23 23:59:39+00:00,POINT (14.13675 60.96268)
4893443,43329313,2019-06-23 23:59:45+00:00,POINT (14.14313 60.96258)
4893443,43329312,2019-06-23 23:59:45+00:00,POINT (14.14313 60.96258)
4893443,43329314,2019-06-23 23:59:56+00:00,POINT (14.15667 60.96244)


In [672]:
from find_transfers import extract_entries_and_exits
daybefore = date(2019,6,22)
extract_entries_and_exits(min_dwell_time=1, d=flight_data[flight_data.UTC.dt.date == daybefore].query("reg=='SEJRA'"))

Unnamed: 0_level_0,Unnamed: 1_level_0,snapshot_id,altitude,latitude_left,longitude_left,speed,flight_id,reg,equip,UTC,UTC_str,...,radius_conservative,zone_name,is_primary_hospital,fixed_wing_option,in_helipad_zone,zone_change,entry,exit,UTC_out,time_in_zone
aircraft_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
4893249,38207038,1561200863,1100,63.19378,14.62745,50,553533437,SEJRA,A169,2019-06-22 10:54:23+00:00,2019-06-22 10:54:23+00:00,...,3000.0,Göviken,True,1.0,True,True,True,False,2019-06-22 10:56:42+00:00,2.316667
4893249,38207329,1561203408,1275,61.79269,17.03901,127,553533437,SEJRA,A169,2019-06-22 11:36:48+00:00,2019-06-22 11:36:48+00:00,...,3000.0,Hudiksvalls sjukhus,True,0.0,True,True,True,False,2019-06-22 13:01:43+00:00,84.916667
4893249,13641805,1561211221,825,59.90854,17.64487,108,553556203,SEJRA,A169,2019-06-22 13:47:01+00:00,2019-06-22 13:47:01+00:00,...,2000.0,Akademiska sjukhuset,False,,True,True,True,False,2019-06-22 15:36:44+00:00,109.716667
4893249,42959729,1561221844,1125,62.35812,17.34298,133,553589426,SEJRA,A169,2019-06-22 16:44:04+00:00,2019-06-22 16:44:04+00:00,...,3000.0,Sundsvalls sjukhus,True,0.0,True,True,True,False,2019-06-22 17:44:09+00:00,60.083333
4893249,36482392,1561229206,925,59.90886,17.63342,137,553612518,SEJRA,A169,2019-06-22 18:46:46+00:00,2019-06-22 18:46:46+00:00,...,2000.0,Akademiska sjukhuset,False,,True,True,True,False,2019-06-22 20:37:57+00:00,111.183333
4893249,41058921,1561238231,4900,61.18941,16.50755,133,553638718,SEJRA,A169,2019-06-22 21:17:11+00:00,2019-06-22 21:17:11+00:00,...,6000.0,Bollnäs sjukhus,True,0.0,True,True,True,False,2019-06-22 21:26:49+00:00,9.633333


In [675]:
flight_data[flight_data.UTC.dt.date == daybefore].query("reg=='SEJRA'")

Unnamed: 0_level_0,Unnamed: 1_level_0,snapshot_id,altitude,latitude_left,longitude_left,speed,flight_id,reg,equip,UTC,UTC_str,...,year,geometry,hospital_name,ambulance_meetup,helipad_location,radius,radius_conservative,zone_name,is_primary_hospital,fixed_wing_option
aircraft_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
4893249,38207038,1561200863,1100,63.19378,14.62745,50,553533437,SEJRA,A169,2019-06-22 10:54:23+00:00,2019-06-22 10:54:23+00:00,...,2019,POINT (14.62745 63.19378),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,38207039,1561200869,1175,63.19397,14.62428,55,553533437,SEJRA,A169,2019-06-22 10:54:29+00:00,2019-06-22 10:54:29+00:00,...,2019,POINT (14.62428 63.19397),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,38207040,1561200875,1250,63.19299,14.62101,67,553533437,SEJRA,A169,2019-06-22 10:54:35+00:00,2019-06-22 10:54:35+00:00,...,2019,POINT (14.62101 63.19299),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,38207041,1561200881,1325,63.19103,14.61894,79,553533437,SEJRA,A169,2019-06-22 10:54:41+00:00,2019-06-22 10:54:41+00:00,...,2019,POINT (14.61894 63.19103),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,38207042,1561200887,1375,63.18875,14.61890,85,553533437,SEJRA,A169,2019-06-22 10:54:47+00:00,2019-06-22 10:54:47+00:00,...,2019,POINT (14.61890 63.18875),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4893249,41059362,1561241851,1400,63.18237,14.63216,44,553638718,SEJRA,A169,2019-06-22 22:17:31+00:00,2019-06-22 22:17:31+00:00,...,2019,POINT (14.63216 63.18237),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,41059363,1561241861,1350,63.18419,14.63294,41,553638718,SEJRA,A169,2019-06-22 22:17:41+00:00,2019-06-22 22:17:41+00:00,...,2019,POINT (14.63294 63.18419),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,41059364,1561241868,1325,63.18544,14.63358,38,553638718,SEJRA,A169,2019-06-22 22:17:48+00:00,2019-06-22 22:17:48+00:00,...,2019,POINT (14.63358 63.18544),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0
4893249,41059365,1561241874,1300,63.18654,14.63379,38,553638718,SEJRA,A169,2019-06-22 22:17:54+00:00,2019-06-22 22:17:54+00:00,...,2019,POINT (14.63379 63.18654),Östersunds sjukhus,1.0,airport,6000.0,3000.0,Göviken,1.0,1.0


In [216]:
from datetime import date
may27 = date(2023,5,27)

In [217]:
transfer_flight_data[transfer_flight_data.UTC_sending.dt.date == may27].query('reg_sending == "SEJSN"')

Unnamed: 0,transfer_id,hospital_name_sending,hospital_name_receiving,year_sending,reg_sending,UTC_sending,UTC_out_sending,time_in_zone_sending,UTC_receiving,zone_name_sending,...,radius_receiving,geometry_sending,geometry_receiving,estimated_distance,expected_transit_time,flight_id_receiving,aircraft_id_receiving,transit_time,transit_time_outlier,transit_time_ratio
6281,7111.0,Vrinnevisjukhuset,"Karolinska universitetssjukhuset, Solna",2023,SEJSN,2023-05-27 21:58:28+00:00,2023-05-28 00:05:02+00:00,126.566667,2023-05-28 00:29:19+00:00,Norrköping flygplats,...,2000.0,POINT (16.33740 58.58730),POINT (18.00423 59.34484),127.757601,30.661824,813216892,4893294,24.283333,False,0.791973


SE-JSK relevant flights:
Found: 25
Not found: 0

SE-JSN relevant flights:
Found: 24
Not found: 1

SE-JXA relevant flights:
Found: 10
Not found: 0

SE-JSJ: 10/0
SE-JSG: 10/0
SE-JRA: 3/2
SE-JXD:
SE-JID: