In [1]:
import pandas as pd

# Leer los datos directamente desde GitHub
url_airports = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat"
url_airlines = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airlines.dat"
url_routes = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat"

# Columnas para cada archivo (documentadas en OpenFlights)
columns_airports = [
    "Airport ID", "Name", "City", "Country", "IATA", "ICAO",
    "Latitude", "Longitude", "Altitude", "Timezone", "DST",
    "Tz database time zone", "Type", "Source"
]

columns_airlines = [
    "Airline ID", "Name", "Alias", "IATA", "ICAO", "Callsign",
    "Country", "Active"
]

columns_routes = [
    "Airline", "Airline ID", "Source airport", "Source airport ID",
    "Destination airport", "Destination airport ID",
    "Codeshare", "Stops", "Equipment"
]

# Cargar los DataFrames
df_airports = pd.read_csv(url_airports, header=None, names=columns_airports)
df_airlines = pd.read_csv(url_airlines, header=None, names=columns_airlines)
df_routes = pd.read_csv(url_routes, header=None, names=columns_routes)




In [2]:
df_airports.head()

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports


In [3]:
df_routes.head()

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2
2,2B,410,ASF,2966,MRV,2962,,0,CR2
3,2B,410,CEK,2968,KZN,2990,,0,CR2
4,2B,410,CEK,2968,OVB,4078,,0,CR2


# Rutas 
Primero voy a modificar el dataframe que tenemos de las rutas para que podamos representarlo en un mapa en `Streamlit` usando la latidud y longitud de estos aeropuertos y graficarlo.

In [4]:
import itertools

combinaciones = list(itertools.product(["Latitude", "Longitude"], ["Source", "Destination"]))


In [36]:
def unir_pos_geografica(df, columns=["Source airport", "Destination airport"]):
    combinaciones = list(itertools.product(["Latitude", "Longitude"], columns))
    for eje, posicion in combinaciones:
        df = df.merge(
            df_airports[["IATA", eje]],
            left_on=posicion,
            right_on="IATA",
            how="left"
        )

        df.rename(columns={eje: f"{posicion.split()[0]} Latitude"}, inplace=True)
        df.drop(columns=["IATA"], inplace=True)
    return df 

In [15]:
df_routes.head()

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment,Source Latitude,Destination Latitude,Source Latitude.1,Destination Latitude.1,Journeys
0,2B,410,AER,2965,KZN,2990,,0,CR2,43.449902,55.606201,39.9566,49.278702,AER-KZN
1,2B,410,ASF,2966,KZN,2990,,0,CR2,46.283298,55.606201,48.006302,49.278702,ASF-KZN
2,2B,410,ASF,2966,MRV,2962,,0,CR2,46.283298,44.225101,48.006302,43.081902,ASF-MRV
3,2B,410,CEK,2968,KZN,2990,,0,CR2,55.305801,55.606201,61.5033,49.278702,CEK-KZN
4,2B,410,CEK,2968,OVB,4078,,0,CR2,55.305801,55.0126,61.5033,82.650703,CEK-OVB


In [20]:
df_routes.shape[0]

67663

In [25]:
df_trips = df_routes.groupby(["Source airport", "Destination airport"]).size().reset_index(name="vuelos")
df_trips["Journeys"] = df_trips["Source airport"] + "-" + df_trips["Destination airport"]

In [38]:
def num_vuelos_ida_vuelta(df):
    contador_viajes = dict()

    for elem in df["Journeys"]:
        if elem in contador_viajes:
            contador_viajes[elem]+=1

        else:
            elem_split = elem.split("-")
            elem_reverse = elem_split[1]+"-"+elem_split[0]

            if elem_reverse in contador_viajes:
                contador_viajes[elem_reverse]+=1

            else:
                contador_viajes[elem]=1

    df_result = pd.DataFrame([{"Journeys": k, "Num vuelos": v} for k, v in contador_viajes.items()])
    df_result = pd.merge(df_result, df[["Journeys", "Source airport", "Destination airport"]], on="Journeys", how="left")
    return df_result
df_ida_vuelta = num_vuelos_ida_vuelta(df_trips)
df_ida_vuelta = unir_pos_geografica(df_ida_vuelta)
df_ida_vuelta

Unnamed: 0,Journeys,Num vuelos,Source airport,Destination airport,Source Latitude,Destination Latitude,Source Latitude.1,Destination Latitude.1
0,AAE-ALG,2,AAE,ALG,36.822201,36.691002,7.809174,3.215410
1,AAE-CDG,2,AAE,CDG,36.822201,49.012798,7.809174,2.550000
2,AAE-IST,2,AAE,IST,36.822201,41.275278,7.809174,28.751944
3,AAE-LYS,2,AAE,LYS,36.822201,45.725556,7.809174,5.081111
4,AAE-MRS,2,AAE,MRS,36.822201,43.439272,7.809174,5.221424
...,...,...,...,...,...,...,...,...
19252,ZRH-ICN,1,ZRH,ICN,47.464699,37.469101,8.549170,126.450996
19253,ZSA-PLS,1,ZSA,PLS,24.063299,21.773600,-74.524002,-72.265900
19254,ZSJ-KEW,1,ZSJ,KEW,53.064201,52.991100,-93.344398,-92.836403
19255,ZSJ-YPM,1,ZSJ,YPM,53.064201,51.819698,-93.344398,-93.973297


In [6]:
df_journeys = pd.DataFrame()