In [1]:
import pandas as pd
import numpy as np
import requests

In [2]:
df_airports_columns = ['Airport ID', 'Name', 'City', 'Country', 'IATA', 'ICAO', 'Latitude', 'Longitude', 'Altitude', 'Timezone', 'Daylight Savings Time', 'Tz database time zone', 'Type', 'Source']

df_airports = pd.read_csv('https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat', header=None, names=df_airports_columns)
df_airports.head()

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,Daylight Savings Time,Tz database time zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports


In [3]:
df_routes_columns = ['Airline', 'Airline ID', 'Source airport', 'Source airport ID', 'Destination airport', 'Destination airport ID', 'Codeshare', 'Stops', 'Equipment']

df_routes = pd.read_csv('https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat', header=None, names=df_routes_columns)
df_routes.head()

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2
2,2B,410,ASF,2966,MRV,2962,,0,CR2
3,2B,410,CEK,2968,KZN,2990,,0,CR2
4,2B,410,CEK,2968,OVB,4078,,0,CR2


In [4]:
# data = requests.get('https://www.eurocontrol.int/performance/data/download/xls/Airport_Traffic.xlsx')

# df_airport_traffic = pd.read_excel(data.content, sheet_name='DATA')
# df_airport_traffic.head()

In [5]:
df_merged = df_routes.merge(df_airports[['IATA', 'Latitude', 'Longitude']], how='left', left_on='Source airport', right_on='IATA').rename(columns={'IATA': 'Source IATA','Latitude': 'Source Latitude', 'Longitude': 'Source Longitude'})

df_merged = df_merged.merge(df_airports[['IATA', 'Latitude', 'Longitude']], how='left', left_on='Destination airport', right_on='IATA').rename(columns={'IATA': 'Destination IATA', 'Latitude': 'Destination Latitude', 'Longitude': 'Destination Longitude'})

df_merged.drop(['Source IATA', 'Destination IATA'], axis=1, inplace=True)
df_merged.dropna(subset=['Source Latitude', 'Source Longitude', 'Destination Latitude', 'Destination Longitude'], inplace=True)

df_merged

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment,Source Latitude,Source Longitude,Destination Latitude,Destination Longitude
0,2B,410,AER,2965,KZN,2990,,0,CR2,43.449902,39.956600,55.606201,49.278702
1,2B,410,ASF,2966,KZN,2990,,0,CR2,46.283298,48.006302,55.606201,49.278702
2,2B,410,ASF,2966,MRV,2962,,0,CR2,46.283298,48.006302,44.225101,43.081902
3,2B,410,CEK,2968,KZN,2990,,0,CR2,55.305801,61.503300,55.606201,49.278702
4,2B,410,CEK,2968,OVB,4078,,0,CR2,55.305801,61.503300,55.012600,82.650703
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67658,ZL,4178,WYA,6334,ADL,3341,,0,SF3,-33.058899,137.514008,-34.945000,138.531006
67659,ZM,19016,DME,4029,FRU,2912,,0,734,55.408798,37.906300,43.061298,74.477600
67660,ZM,19016,FRU,2912,DME,4029,,0,734,43.061298,74.477600,55.408798,37.906300
67661,ZM,19016,FRU,2912,OSS,2913,,0,734,43.061298,74.477600,40.609001,72.793297


In [6]:
from math import sin, cos, sqrt, atan2, radians

# Function to calculate the distance between two points using the Haversine formula
def haversine(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Radius of the Earth in km
    earth_radius_km = 6371

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    distance = earth_radius_km * c

    return distance

# Calculate the distance between the source and destination airports
df_merged['Great-circle distance'] = df_merged.apply(lambda row: haversine(row['Source Latitude'], row['Source Longitude'], row['Destination Latitude'], row['Destination Longitude']), axis=1)


In [7]:
from geopy.distance import geodesic

def calculate_geodesic_distance(lat1, lon1, lat2, lon2):
    point1 = (lat1, lon1)
    point2 = (lat2, lon2)

    distance = geodesic(point1, point2).km
    return distance

df_merged['Geodesic Distance WGS-84'] = df_merged.apply(
    lambda row: calculate_geodesic_distance(
        row['Source Latitude'], row['Source Longitude'], 
        row['Destination Latitude'], row['Destination Longitude']), 
    axis=1)

In [10]:
print(
    "Diferença em km entre diferentes métodos de cálculo da menor distância entre 2 pontos na superfície terrestre: ", 
    max(abs(df_merged['Geodesic Distance WGS-84']-df_merged['Great-circle distance']))
    )

Diferença em km entre diferentes métodos de cálculo da menor distância entre 2 pontos na superfície terrestre:  35.18291019776552


In [11]:
df_merged

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment,Source Latitude,Source Longitude,Destination Latitude,Destination Longitude,Great-circle distance,Geodesic Distance WGS-84
0,2B,410,AER,2965,KZN,2990,,0,CR2,43.449902,39.956600,55.606201,49.278702,1506.825604,1507.989680
1,2B,410,ASF,2966,KZN,2990,,0,CR2,46.283298,48.006302,55.606201,49.278702,1040.438320,1040.943207
2,2B,410,ASF,2966,MRV,2962,,0,CR2,46.283298,48.006302,44.225101,43.081902,448.164909,449.036664
3,2B,410,CEK,2968,KZN,2990,,0,CR2,55.305801,61.503300,55.606201,49.278702,770.508500,773.126239
4,2B,410,CEK,2968,OVB,4078,,0,CR2,55.305801,61.503300,55.012600,82.650703,1338.631467,1343.161122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67658,ZL,4178,WYA,6334,ADL,3341,,0,SF3,-33.058899,137.514008,-34.945000,138.531006,229.720619,229.334786
67659,ZM,19016,DME,4029,FRU,2912,,0,734,55.408798,37.906300,43.061298,74.477600,2942.819259,2949.906099
67660,ZM,19016,FRU,2912,DME,4029,,0,734,43.061298,74.477600,55.408798,37.906300,2942.819259,2949.906099
67661,ZM,19016,FRU,2912,OSS,2913,,0,734,43.061298,74.477600,40.609001,72.793297,306.295375,306.189294
