In [49]:
import pandas as pd
import gzip

# Cambia la ruta al archivo que descargues
file = "flightlist_20200301_20200331.csv.gz"

with gzip.open(file, 'rt') as f:
    df = pd.read_csv(f)



In [None]:
# Guardo el dataset de los continentes
url_continents = "https://datahub.io/core/airport-codes/r/airport-codes.csv"
df_continents = pd.read_csv(url_continents)


# Elimino las columnas que no son necesarias
df_continents.drop(["ident", "name", "elevation_ft", "iso_region", "municipality", "gps_code", "local_code"], axis=1, inplace=True)

# Parece ser que donde pone NaN en verdad es NA
df_continents["continent"] = df_continents["continent"].fillna("NA")

# Elimino las filas que no tienen codigo ICAO
df_continents = df_continents[df_continents["icao_code"].notna()]

# Elimino las filas que no tienen codigo IATA
df_continents = df_continents[df_continents["iata_code"].notna()]

# Elimino aquellos que no sean aeropuertos
df_continents = df_continents[~df_continents["type"].isin(["heliport", "seaplane_base"])]

# Para renombrar los paises usare un dataframe de soporte
url_iso = "https://datahub.io/core/country-list/r/data.csv"
df_iso = pd.read_csv(url_iso)  

df_continents = df_continents.merge(df_iso, left_on='iso_country', right_on='Code', how='left')
df_continents = df_continents.rename(columns={'Name': 'country_name'}).drop(columns=['Code'])

# Tambien voy a renombrar los continentes para que sean mas amigables
continent_names = {
    'AF': 'Africa',
    'AN': 'Antarctica',
    'AS': 'Asia',
    'EU': 'Europe',
    'NA': 'North America',
    'OC': 'Oceania',
    'SA': 'South America'
}
df_continents['continent'] = df_continents['continent'].replace(continent_names)


      ident           type                              name  elevation_ft  \
39161  KLAS  large_airport  Harry Reid International Airport        2181.0   

      continent iso_country iso_region municipality icao_code iata_code  \
39161       NaN          US      US-NV    Las Vegas      KLAS       LAS   

      gps_code local_code             coordinates  
39161     KLAS        LAS  36.083361, -115.151817  


In [60]:
df_continents[df_continents["icao_code"]=="KLAS"]

Unnamed: 0,type,continent,iso_country,icao_code,iata_code,coordinates,country_name
2774,large_airport,,US,KLAS,LAS,"36.083361, -115.151817",United States


In [64]:
df_continents[df_continents["country_name"]=="United States"]

Unnamed: 0,type,continent,iso_country,icao_code,iata_code,coordinates,country_name
2024,small_airport,North America,US,KAAF,AAF,"29.727501, -85.027496",United States
2025,medium_airport,North America,US,KABE,ABE,"40.651773, -75.442797",United States
2026,medium_airport,North America,US,KABI,ABI,"32.4113006592, -99.68190002440001",United States
2027,large_airport,North America,US,KABQ,ABQ,"35.039976, -106.608925",United States
2028,medium_airport,North America,US,KABR,ABR,"45.449100494384766, -98.42179870605469",United States
...,...,...,...,...,...,...,...
4911,small_airport,Oceania,US,PHPA,PAK,"21.8969, -159.602997",United States
4912,small_airport,Oceania,US,PHSF,BSF,"19.760099, -155.554001",United States
4913,medium_airport,Oceania,US,PHTO,ITO,"19.721399, -155.048004",United States
4914,small_airport,Oceania,US,PHUP,UPP,"20.265301, -155.860001",United States


In [65]:
df_continents["continent"].unique()

array(['Oceania', 'North America', 'Europe', 'Asia', 'Africa',
       'South America', 'Antarctica'], dtype=object)