**For article "Which airports have the most direct international connections"**

In [1]:
# Import airport and flight-route data from http://openflights.org/data.html
import pandas as pd
routes = pd.read_csv('routes.dat', names = ['Airline','Airline ID', 'Source airport','Source airport ID',
                                            'Destination airport','Destination airport ID','Codeshare','Stops',
                                           'Equipment'])
airports = pd.read_csv('airports.dat', names = ['id','Name','City', 'Country','IATA','ICAO','Latitude','Longitude',
                                          'Altitude','Timezone','DST','Tz database time zone'])

In [34]:
# Merge airport and flight-route data into one table. Rename fields, and remove missing data and extraneous fields.
df = routes
df = df[df['Destination airport'].notnull()]
df2 = airports[['IATA','City', 'Country']]
df2 = df2[df2['IATA'].notnull()]
routes_large = df.merge(df2, left_on = 'Destination airport', right_on = 'IATA')
routes_large.rename(columns = {'City': 'Destination city', 'Country': 'Destination country'}, inplace = True)
df = routes_large
df = df[df['Source airport'].notnull()]
routes_large = df.merge(df2, left_on = 'Source airport', right_on = 'IATA')
routes_large.drop(['IATA_x','IATA_y'], axis = 1, inplace = True)
routes_large.rename(columns = {'City': 'Source city', 'Country': 'Source country'}, inplace = True)
routes_large.head()

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment,Destination city,Destination country,Source city,Source country
0,2B,410,AER,2965,KZN,2990,,0,CR2,Kazan,Russia,Sochi,Russia
1,S7,4329,AER,2965,DME,4029,,0,319 320 738,Moscow,Russia,Sochi,Russia
2,U6,5234,AER,2965,DME,4029,,0,320,Moscow,Russia,Sochi,Russia
3,UN,5067,AER,2965,DME,4029,,0,734 735,Moscow,Russia,Sochi,Russia
4,Y7,13088,AER,2965,DME,4029,,0,738,Moscow,Russia,Sochi,Russia


In [52]:
# Find which airports have the most direct international connections (top international airports)
df = routes_large
df = df[['Source airport', 'Destination country']].drop_duplicates().groupby('Source airport').size().sort_values(ascending = False)
df = pd.DataFrame(df)
df.reset_index(inplace = True)
df.columns = ['IATA', 'Num_int_flights']
int_airports = df # all international airports
top_int_airport = df.head(15) # only top international airports
top_int_airport

Unnamed: 0,IATA,Num_int_flights
0,CDG,105
1,FRA,93
2,IST,91
3,DXB,86
4,AMS,79
5,LHR,77
6,JFK,71
7,FCO,70
8,LGW,63
9,DOH,62


In [49]:
# Add fields to top international airports, and save results to excel.
df = top_int_airports
df = df.merge(airports)
df.to_excel('top_int_airports_large.xls')
top_int_airports_large = df
df

Unnamed: 0,IATA,Num_int_flights,id,Name,City,Country,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
0,CDG,105,1382,Charles De Gaulle,Paris,France,LFPG,49.012779,2.55,392,1.0,E,Europe/Paris
1,FRA,93,340,Frankfurt Main,Frankfurt,Germany,EDDF,50.026421,8.543125,364,1.0,E,Europe/Berlin
2,IST,91,1701,Ataturk,Istanbul,Turkey,LTBA,40.976922,28.814606,163,2.0,E,Europe/Istanbul
3,DXB,86,2188,Dubai Intl,Dubai,United Arab Emirates,OMDB,25.252778,55.364444,62,4.0,U,Asia/Dubai
4,AMS,79,580,Schiphol,Amsterdam,Netherlands,EHAM,52.308613,4.763889,-11,1.0,E,Europe/Amsterdam
5,LHR,77,507,Heathrow,London,United Kingdom,EGLL,51.4775,-0.461389,83,0.0,E,Europe/London
6,JFK,71,3797,John F Kennedy Intl,New York,United States,KJFK,40.639751,-73.778925,13,-5.0,A,America/New_York
7,FCO,70,1555,Fiumicino,Rome,Italy,LIRF,41.804475,12.250797,15,1.0,E,Europe/Rome
8,LGW,63,502,Gatwick,London,United Kingdom,EGKK,51.148056,-0.190278,202,0.0,E,Europe/London
9,DOH,62,2241,Doha Intl,Doha,Qatar,OTBD,25.261125,51.565056,35,3.0,U,Asia/Qatar


**Futher analysis**

In [119]:
# Enable up to 200 rows to be seen on screen
pd.options.display.max_rows = 200

In [120]:
# Find the countries reachable from BUD (Budapest - not in top 15)
df = routes_large
df = df[['Source airport', 'Destination country']].drop_duplicates()
df = df[df['Source airport'] == 'BUD']
print('There are %s countries reachable from BUD.' % len(df))
df['Destination country'].sort_values()

There are 33 countries reachable from BUD.


18376                 Austria
18326              Azerbaijan
18412                 Belarus
18344                 Belgium
18379                 Croatia
18330                  Cyprus
18371          Czech Republic
18395                 Denmark
18351                   Egypt
18391                 Finland
18382                  France
18338                 Germany
18355                  Greece
18358                 Ireland
18349                  Israel
18342                   Italy
18398                  Latvia
18408                   Malta
18347             Netherlands
18332                  Norway
18377                  Poland
18367                Portugal
18403                   Qatar
18402                 Romania
18352                  Russia
18389                  Serbia
18334                   Spain
18331                  Sweden
18327             Switzerland
18345                  Turkey
18404                 Ukraine
18333    United Arab Emirates
18329          United Kingdom
Name: Dest

In [121]:
# Find number of cities reachable from BUD
df = routes_large
df = df[['Source airport', 'Destination city']].drop_duplicates()
df = df[df['Source airport'] == 'BUD']
print('There are %s cities reachable from BUD.' % len(df))
df['Destination city'].sort_values()

There are 68 cities reachable from BUD.


18347        Amsterdam
18355           Athens
18326             Baku
18334        Barcelona
18356             Bari
18353            Basel
18401         Beauvais
18389         Belgrade
18343          Bergamo
18341           Berlin
18410          Billund
18329          Bristol
18344         Brussels
18402        Bucharest
18351            Cairo
18357          Catania
18416        Charleroi
18348          Cologne
18395       Copenhagen
18403             Doha
18384         Dortmund
18333            Dubai
18358           Dublin
18339      Duesseldorf
18414    East Midlands
18360        Edinburgh
18418        Eindhoven
18385        Frankfurt
18328           Geneva
18415       Gothenborg
18419             Hahn
18340          Hamburg
18391         Helsinki
18345         Istanbul
18404             Kiev
18397        Krasnodar
18330          Larnaca
18406            Leeds
18367           Lisbon
18336           London
18380           Madrid
18388           Malaga
18411           Malmoe
18408      

In [122]:
# Find number of airports reachable from BUD
df = routes_large
df = df[['Source airport', 'Destination airport']].drop_duplicates()
df = df[df['Source airport'] == 'BUD']
print('There are %s airports reachable from BUD.' % len(df))
df['Destination airport'].sort_values()

There are 77 airports reachable from BUD.


18388    AGP
18347    AMS
18331    ARN
18355    ATH
18334    BCN
18389    BEG
18343    BGY
18410    BLL
18356    BRI
18329    BRS
18344    BRU
18353    BSL
18401    BVA
18351    CAI
18382    CDG
18348    CGN
18422    CIA
18395    CPH
18416    CRL
18357    CTA
18403    DOH
18384    DTM
18358    DUB
18339    DUS
18333    DWC
18360    EDI
18418    EIN
18414    EMA
18361    FCO
18385    FRA
18415    GSE
18328    GVA
18326    GYD
18340    HAM
18391    HEL
18419    HHN
18404    IEV
18399    IST
18397    KRR
18406    LBA
18330    LCA
18336    LGW
18364    LHR
18367    LIS
18420    LTN
18380    MAD
18368    MAN
18408    MLA
18411    MMX
18412    MSQ
18338    MUC
18370    MXP
18342    NAP
18423    NYO
18405    ORY
18332    OSL
18402    OTP
18371    PRG
18373    PSA
18398    RIX
18400    ROV
18407    RTM
18345    SAW
18374    SKG
18375    STN
18354    STR
18393    SVO
18394    SXF
18349    TLV
18413    TMP
18346    TSF
18341    TXL
18376    VIE
18352    VKO
18377    WAW
18379    ZAG
18327    ZRH

In [123]:
# List the countries reachable from CDG (Charles de Gaulle, Paris)
df = routes_large
df = df[df['Source airport'] == 'CDG']
df = df[['Source airport', 'Destination country']].drop_duplicates().sort_values(by = 'Destination country')
df['Destination country']

15861                     Algeria
16104                      Angola
15814                   Argentina
15984                     Armenia
15925                     Austria
15648                  Azerbaijan
16022                     Bahrain
16149                     Belarus
15764                     Belgium
15659                       Benin
15680                      Brazil
16046                    Bulgaria
15652                Burkina Faso
15702                    Cameroon
15706                      Canada
16144                  Cape Verde
16115    Central African Republic
16004                        Chad
15972                       Chile
15790                       China
15663                    Colombia
16118         Congo (Brazzaville)
16106            Congo (Kinshasa)
15653               Cote d'Ivoire
15878                     Croatia
15850                        Cuba
15672                      Cyprus
15910              Czech Republic
15980                     Denmark
16124         