In [1]:
import pandas as pd

# Leer los datos directamente desde GitHub
url_airports = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat"
url_airlines = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airlines.dat"
url_routes = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat"

# Columnas para cada archivo (documentadas en OpenFlights)
columns_airports = [
    "Airport ID", "Name", "City", "Country", "IATA", "ICAO",
    "Latitude", "Longitude", "Altitude", "Timezone", "DST",
    "Tz database time zone", "Type", "Source"
]

columns_airlines = [
    "Airline ID", "Name", "Alias", "IATA", "ICAO", "Callsign",
    "Country", "Active"
]

columns_routes = [
    "Airline", "Airline ID", "Source airport", "Source airport ID",
    "Destination airport", "Destination airport ID",
    "Codeshare", "Stops", "Equipment"
]

# Cargar los DataFrames
df_airports = pd.read_csv(url_airports, header=None, names=columns_airports)
df_airlines = pd.read_csv(url_airlines, header=None, names=columns_airlines)
df_routes = pd.read_csv(url_routes, header=None, names=columns_routes)




In [2]:
df_airports.head()

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports


In [3]:
df_routes.head()

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2
2,2B,410,ASF,2966,MRV,2962,,0,CR2
3,2B,410,CEK,2968,KZN,2990,,0,CR2
4,2B,410,CEK,2968,OVB,4078,,0,CR2


# Rutas 
Primero voy a modificar el dataframe que tenemos de las rutas para que podamos representarlo en un mapa en `Streamlit` usando la latidud y longitud de estos aeropuertos y graficarlo.

In [4]:
import itertools

combinaciones = list(itertools.product(["Latitude", "Longitude"], ["Source", "Destination"]))


In [5]:
for eje, posicion in combinaciones:
    df_routes = df_routes.merge(
        df_airports[["IATA", eje]],
        left_on=f"{posicion} airport",
        right_on="IATA",
        how="left"
    )

    df_routes.rename(columns={eje: f"{posicion} Latitude"}, inplace=True)
    df_routes.drop(columns=["IATA"], inplace=True)

In [9]:
df_routes["Journeys"] = df_routes["Source airport"]+"-"+df_routes["Destination airport"]

In [15]:
df_routes.head()

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment,Source Latitude,Destination Latitude,Source Latitude.1,Destination Latitude.1,Journeys
0,2B,410,AER,2965,KZN,2990,,0,CR2,43.449902,55.606201,39.9566,49.278702,AER-KZN
1,2B,410,ASF,2966,KZN,2990,,0,CR2,46.283298,55.606201,48.006302,49.278702,ASF-KZN
2,2B,410,ASF,2966,MRV,2962,,0,CR2,46.283298,44.225101,48.006302,43.081902,ASF-MRV
3,2B,410,CEK,2968,KZN,2990,,0,CR2,55.305801,55.606201,61.5033,49.278702,CEK-KZN
4,2B,410,CEK,2968,OVB,4078,,0,CR2,55.305801,55.0126,61.5033,82.650703,CEK-OVB


In [20]:
df_routes.shape[0]

67663

In [21]:
contador_viajes = dict()

for elem in df_routes["Journeys"]:
    if elem in contador_viajes:
        contador_viajes[elem]+=1

    else:
        elem_split = elem.split("-")
        elem_reverse = elem_split[1]+"-"+elem_split[0]

        if elem_reverse in contador_viajes:
            contador_viajes[elem_reverse]+=1

        else:
            contador_viajes[elem]=1

contador_viajes


{'AER-KZN': 2,
 'ASF-KZN': 2,
 'ASF-MRV': 2,
 'CEK-KZN': 2,
 'CEK-OVB': 4,
 'DME-KZN': 8,
 'DME-NBC': 2,
 'DME-TGK': 2,
 'DME-UUA': 2,
 'EGO-KGD': 2,
 'EGO-KZN': 2,
 'GYD-NBC': 2,
 'KZN-LED': 6,
 'KZN-SVX': 2,
 'LED-NBC': 2,
 'LED-UUA': 2,
 'NBC-SVX': 2,
 'NJC-SVX': 4,
 'NJC-UUA': 4,
 'NUX-SVX': 4,
 'OVB-SVX': 10,
 'BTK-IKT': 2,
 'BTK-OVB': 2,
 'HTA-IKT': 4,
 'IKT-KCK': 2,
 'IKT-ODO': 4,
 'IKT-OVB': 6,
 'IKT-UKX': 4,
 'IKT-ULK': 6,
 'IKT-YKS': 6,
 'MJZ-OVB': 3,
 'AYP-LIM': 4,
 'CUZ-LIM': 9,
 'CUZ-PEM': 4,
 'HUU-LIM': 4,
 'IQT-PCL': 4,
 'IQT-TPP': 4,
 'LIM-PCL': 6,
 'LIM-TPP': 6,
 'PCL-TPP': 2,
 'ABJ-BOY': 2,
 'ABJ-OUA': 14,
 'ACC-OUA': 2,
 'BKO-ABJ': 5,
 'BKO-DKR': 10,
 'BKO-OUA': 6,
 'BOY-OUA': 2,
 'COO-LFW': 5,
 'COO-OUA': 2,
 'DKR-OUA': 2,
 'LFW-OUA': 4,
 'NIM-OUA': 15,
 'BOG-GYE': 4,
 'BOG-UIO': 8,
 'CLO-GYE': 4,
 'GYE-SCY': 8,
 'GYE-UIO': 13,
 'OCC-UIO': 4,
 'BDS-ZRH': 6,
 'BOD-ZRH': 2,
 'BRS-ZRH': 2,
 'GVA-LPA': 1,
 'LCA-ZRH': 6,
 'LPA-ZRH': 6,
 'RMF-ZRH': 6,
 'TFS-GVA': 3,
 'AJR

In [6]:
df_journeys = pd.DataFrame()