## Example: Analyzing Airport Operations

Download `airports.csv`, `airport-frequencies.csv`, `countries.csv`, `regions.csv` from [OurAirports.com](https://ourairports.com/data/)

In [1]:
import numpy as np
import pandas as pd

In [4]:
# Load the datasets
airports = pd.read_csv("https://ourairports.com/data/airports.csv", sep=",")
airports.head()

Data types:
 id                     int64
ident                 object
type                  object
name                  object
latitude_deg         float64
longitude_deg        float64
elevation_ft         float64
continent             object
iso_country           object
iso_region            object
municipality          object
scheduled_service     object
gps_code              object
iata_code             object
local_code            object
home_link             object
wikipedia_link        object
keywords              object
dtype: object


In [5]:
print("Size:\n", airports.shape)
print("Data types:\n", airports.dtypes)

Size:
 (58596, 18)
Data types:
 id                     int64
ident                 object
type                  object
name                  object
latitude_deg         float64
longitude_deg        float64
elevation_ft         float64
continent             object
iso_country           object
iso_region            object
municipality          object
scheduled_service     object
gps_code              object
iata_code             object
local_code            object
home_link             object
wikipedia_link        object
keywords              object
dtype: object


In [6]:
airports.head()

Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords
0,6523,00A,heliport,Total Rf Heliport,40.070801,-74.933601,11.0,,US,US-PA,Bensalem,no,00A,,00A,,,
1,323361,00AA,small_airport,Aero B Ranch Airport,38.704022,-101.473911,3435.0,,US,US-KS,Leoti,no,00AA,,00AA,,,
2,6524,00AK,small_airport,Lowell Field,59.9492,-151.695999,450.0,,US,US-AK,Anchor Point,no,00AK,,00AK,,,
3,6525,00AL,small_airport,Epps Airpark,34.864799,-86.770302,820.0,,US,US-AL,Harvest,no,00AL,,00AL,,,
4,6526,00AR,closed,Newport Hospital & Clinic Heliport,35.6087,-91.254898,237.0,,US,US-AR,Newport,no,,,,,,00AR


In [7]:
airports.isnull().sum()

id                       0
ident                    0
type                     0
name                     0
latitude_deg             0
longitude_deg            0
elevation_ft          8486
continent            29010
iso_country            246
iso_region               0
municipality          5873
scheduled_service        0
gps_code             16849
iata_code            49361
local_code           28045
home_link            55433
wikipedia_link       48479
keywords             47729
dtype: int64

In [17]:
# Load other csv files
airport_freq = pd.read_csv("https://ourairports.com/data/airport-frequencies.csv", sep=',')
countries = pd.read_csv("https://ourairports.com/data/countries.csv", sep=',')
regions = pd.read_csv("https://ourairports.com/data/regions.csv", sep=',')

In [13]:
def basic_info(df):
    print("Size:")
    print(df.shape)
    print("="*20)
    print("Data types:")
    print(df.dtypes)
    print("="*20)
    print("Missing Values:")
    print(df.isnull().sum())
    print(df.head())

In [14]:
basic_info(airport_freq)

Size:
(28920, 6)
Data types:
id                 int64
airport_ref        int64
airport_ident     object
type              object
description       object
frequency_mhz    float64
dtype: object
Missing Values:
id                 0
airport_ref        0
airport_ident      0
type               0
description      874
frequency_mhz      0
dtype: int64
       id  airport_ref airport_ident   type          description  \
0   70518         6528          00CA   CTAF                 CTAF   
1  307581         6589          01FL  ARCAL                  NaN   
2   75239         6589          01FL   CTAF  CEDAR KNOLL TRAFFIC   
3   60191         6756          04CA   CTAF                 CTAF   
4   59287         6779          04MS   UNIC               UNICOM   

   frequency_mhz  
0          122.9  
1          122.9  
2          122.8  
3          122.9  
4          122.8  


In [15]:
basic_info(countries)

Size:
(247, 6)
Data types:
id                 int64
code              object
name              object
continent         object
wikipedia_link    object
keywords          object
dtype: object
Missing Values:
id                  0
code                1
name                0
continent          41
wikipedia_link      0
keywords          155
dtype: int64
       id code                  name continent  \
0  302672   AD               Andorra        EU   
1  302618   AE  United Arab Emirates        AS   
2  302619   AF           Afghanistan        AS   
3  302722   AG   Antigua and Barbuda       NaN   
4  302723   AI              Anguilla       NaN   

                                      wikipedia_link  \
0              https://en.wikipedia.org/wiki/Andorra   
1  https://en.wikipedia.org/wiki/United_Arab_Emir...   
2          https://en.wikipedia.org/wiki/Afghanistan   
3  https://en.wikipedia.org/wiki/Antigua_and_Barbuda   
4             https://en.wikipedia.org/wiki/Anguilla   

          

In [18]:
basic_info(regions)

Size:
(3963, 8)
Data types:
id                 int64
code              object
local_code        object
name              object
continent         object
iso_country       object
wikipedia_link    object
keywords          object
dtype: object
Missing Values:
id                   0
code                 0
local_code           6
name                 0
continent          419
iso_country         15
wikipedia_link     251
keywords          3503
dtype: int64
       id   code local_code                        name continent iso_country  \
0  302811  AD-02         02              Canillo Parish        EU          AD   
1  302812  AD-03         03               Encamp Parish        EU          AD   
2  302813  AD-04         04           La Massana Parish        EU          AD   
3  302814  AD-05         05               Ordino Parish        EU          AD   
4  302815  AD-06         06  Sant Julià de Lòria Parish        EU          AD   

                                      wikipedia_link keywo

#### 1. Select data with multiple conditions

In [24]:
# Find the region code for New York from region data frame.

regions.head()

Unnamed: 0,id,code,local_code,name,continent,iso_country,wikipedia_link,keywords
0,302811,AD-02,2,Canillo Parish,EU,AD,https://en.wikipedia.org/wiki/Canillo,
1,302812,AD-03,3,Encamp Parish,EU,AD,https://en.wikipedia.org/wiki/Encamp,
2,302813,AD-04,4,La Massana Parish,EU,AD,https://en.wikipedia.org/wiki/La_Massana,
3,302814,AD-05,5,Ordino Parish,EU,AD,https://en.wikipedia.org/wiki/Ordino,
4,302815,AD-06,6,Sant Julià de Lòria Parish,EU,AD,https://en.wikipedia.org/wiki/Sant_Julià_de_Lòria,


In [25]:
countries[countries['name'] == 'United States'] # This tells us that the country code for United States is US

Unnamed: 0,id,code,name,continent,wikipedia_link,keywords
228,302755,US,United States,,https://en.wikipedia.org/wiki/United_States,America


In [28]:
regions[(regions['iso_country'] == "US") & (regions['local_code'] == "NY")]

Unnamed: 0,id,code,local_code,name,continent,iso_country,wikipedia_link,keywords
3730,306110,US-NY,NY,New York,,US,https://en.wikipedia.org/wiki/New_York,


In [29]:
regions[(regions['iso_country'] == "US") & (regions['name'] == "New York")]

Unnamed: 0,id,code,local_code,name,continent,iso_country,wikipedia_link,keywords
3730,306110,US-NY,NY,New York,,US,https://en.wikipedia.org/wiki/New_York,


Result: The region code for New York is US-NY.

In [30]:
# Extract all large airports in New York state from airoprts data frame

airports.head()

Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords
0,6523,00A,heliport,Total Rf Heliport,40.070801,-74.933601,11.0,,US,US-PA,Bensalem,no,00A,,00A,,,
1,323361,00AA,small_airport,Aero B Ranch Airport,38.704022,-101.473911,3435.0,,US,US-KS,Leoti,no,00AA,,00AA,,,
2,6524,00AK,small_airport,Lowell Field,59.9492,-151.695999,450.0,,US,US-AK,Anchor Point,no,00AK,,00AK,,,
3,6525,00AL,small_airport,Epps Airpark,34.864799,-86.770302,820.0,,US,US-AL,Harvest,no,00AL,,00AL,,,
4,6526,00AR,closed,Newport Hospital & Clinic Heliport,35.6087,-91.254898,237.0,,US,US-AR,Newport,no,,,,,,00AR


In [32]:
set(airports['type'].values)

{'balloonport',
 'closed',
 'heliport',
 'large_airport',
 'medium_airport',
 'seaplane_base',
 'small_airport'}

In [33]:
airports['type'].unique()

array(['heliport', 'small_airport', 'closed', 'seaplane_base',
       'balloonport', 'medium_airport', 'large_airport'], dtype=object)

In [37]:
airports_NY_large = airports[(airports['iso_region'] == "US-NY") & (airports['type'] == 'large_airport')]

In [40]:
# Extract the name, identification code, and municipality of 
# all airports with ISO region "US-NY" and type "large_airport"

airports_NY_large[["name", "ident", "municipality"]].reset_index(drop=True) # reset index to remove original line numbers

Unnamed: 0,name,ident,municipality
0,Buffalo Niagara International Airport,KBUF,Buffalo
1,John F Kennedy International Airport,KJFK,New York
2,La Guardia Airport,KLGA,New York
3,Greater Rochester International Airport,KROC,Rochester
4,Syracuse Hancock International Airport,KSYR,Syracuse


#### 2. Sorting

In [41]:
# From airport_freq, extract all communication frequencies for KJFK,
# with frequencies sorted in ascending order

airport_freq.head()

Unnamed: 0,id,airport_ref,airport_ident,type,description,frequency_mhz
0,70518,6528,00CA,CTAF,CTAF,122.9
1,307581,6589,01FL,ARCAL,,122.9
2,75239,6589,01FL,CTAF,CEDAR KNOLL TRAFFIC,122.8
3,60191,6756,04CA,CTAF,CTAF,122.9
4,59287,6779,04MS,UNIC,UNICOM,122.8


In [43]:
KJFK_freq = airport_freq[airport_freq['airport_ident'] == 'KJFK']

In [45]:
KJFK_freq.sort_values(by="frequency_mhz")

Unnamed: 0,id,airport_ref,airport_ident,type,description,frequency_mhz
11656,69294,3622,KJFK,ATIS,ATIS,115.1
11661,69298,3622,KJFK,RDO,NEW YORK RDO,115.9
11662,69299,3622,KJFK,TWR,KENNEDY TWR,119.1
11659,332895,3622,KJFK,GND,GND ALT,121.65
11660,69297,3622,KJFK,GND,GND,121.9
11664,69300,3622,KJFK,UNIC,UNICOM,122.95
11663,332894,3622,KJFK,TWR,TWR ALT,123.9
11653,69293,3622,KJFK,APP,NEW YORK APP (ROBER),125.7
11654,301312,3622,KJFK,APP,NEW YORK APPROACH (CAMRN),127.4
11655,301313,3622,KJFK,APP,NEW YORK APPROACH (FINAL),132.4


In [46]:
# From airport_freq, extract all communication frequencies for KJFK,
# with frequencies sorted in descending order

KJFK_freq.sort_values(by="frequency_mhz", ascending=False)

Unnamed: 0,id,airport_ref,airport_ident,type,description,frequency_mhz
11658,69296,3622,KJFK,DEP,NEW YORK DEP,135.9
11657,69295,3622,KJFK,CLD,CLNC DEL,135.05
11655,301313,3622,KJFK,APP,NEW YORK APPROACH (FINAL),132.4
11654,301312,3622,KJFK,APP,NEW YORK APPROACH (CAMRN),127.4
11653,69293,3622,KJFK,APP,NEW YORK APP (ROBER),125.7
11663,332894,3622,KJFK,TWR,TWR ALT,123.9
11664,69300,3622,KJFK,UNIC,UNICOM,122.95
11660,69297,3622,KJFK,GND,GND,121.9
11659,332895,3622,KJFK,GND,GND ALT,121.65
11662,69299,3622,KJFK,TWR,KENNEDY TWR,119.1


In [47]:
# Find the five rows with larget frequency value from the previous data frame

KJFK_freq.head() # by default head() returns the first 5 rows

Unnamed: 0,id,airport_ref,airport_ident,type,description,frequency_mhz
11653,69293,3622,KJFK,APP,NEW YORK APP (ROBER),125.7
11654,301312,3622,KJFK,APP,NEW YORK APPROACH (CAMRN),127.4
11655,301313,3622,KJFK,APP,NEW YORK APPROACH (FINAL),132.4
11656,69294,3622,KJFK,ATIS,ATIS,115.1
11657,69295,3622,KJFK,CLD,CLNC DEL,135.05


#### 3. Filter on a list of values

In [57]:
# Extract all communication frequencies used for a large airport in New York state

freq_NY_airports = pd.DataFrame()

for ident in airports_NY_large['ident']:
#     print(ident) # verify that the identification codes are extracted correctly
    freq_airport = airport_freq[airport_freq['airport_ident'] == ident]
#     print(freq_airport) # verify that the frequencies are extracted correctly
    freq_NY_airports = pd.concat([freq_NY_airports, freq_airport])
    
freq_NY_airports

Unnamed: 0,id,airport_ref,airport_ident,type,description,frequency_mhz
7785,69857,3431,KBUF,A/D,Buffalo APP/DEP,126.15
7786,69858,3431,KBUF,ATIS,ATIS,135.35
7787,69859,3431,KBUF,CLD,CLNC DEL,124.7
7788,69860,3431,KBUF,GND,GND,133.2
7789,69861,3431,KBUF,RDO,RDO,122.6
7790,69862,3431,KBUF,TWR,TWR,120.5
11653,69293,3622,KJFK,APP,NEW YORK APP (ROBER),125.7
11654,301312,3622,KJFK,APP,NEW YORK APPROACH (CAMRN),127.4
11655,301313,3622,KJFK,APP,NEW YORK APPROACH (FINAL),132.4
11656,69294,3622,KJFK,ATIS,ATIS,115.1


In [58]:
idents = airports_NY_large['ident']
filter1 = airport_freq['airport_ident'].isin(idents)
airport_freq[filter1]

Unnamed: 0,id,airport_ref,airport_ident,type,description,frequency_mhz
7785,69857,3431,KBUF,A/D,Buffalo APP/DEP,126.15
7786,69858,3431,KBUF,ATIS,ATIS,135.35
7787,69859,3431,KBUF,CLD,CLNC DEL,124.7
7788,69860,3431,KBUF,GND,GND,133.2
7789,69861,3431,KBUF,RDO,RDO,122.6
7790,69862,3431,KBUF,TWR,TWR,120.5
11653,69293,3622,KJFK,APP,NEW YORK APP (ROBER),125.7
11654,301312,3622,KJFK,APP,NEW YORK APPROACH (CAMRN),127.4
11655,301313,3622,KJFK,APP,NEW YORK APPROACH (FINAL),132.4
11656,69294,3622,KJFK,ATIS,ATIS,115.1


In [60]:
airport_freq[airport_freq['airport_ident'].isin(airports_NY_large['ident'])]

Unnamed: 0,id,airport_ref,airport_ident,type,description,frequency_mhz
7785,69857,3431,KBUF,A/D,Buffalo APP/DEP,126.15
7786,69858,3431,KBUF,ATIS,ATIS,135.35
7787,69859,3431,KBUF,CLD,CLNC DEL,124.7
7788,69860,3431,KBUF,GND,GND,133.2
7789,69861,3431,KBUF,RDO,RDO,122.6
7790,69862,3431,KBUF,TWR,TWR,120.5
11653,69293,3622,KJFK,APP,NEW YORK APP (ROBER),125.7
11654,301312,3622,KJFK,APP,NEW YORK APPROACH (CAMRN),127.4
11655,301313,3622,KJFK,APP,NEW YORK APPROACH (FINAL),132.4
11656,69294,3622,KJFK,ATIS,ATIS,115.1


#### 4. Grouping

In [63]:
countries.head()

Unnamed: 0,id,code,name,continent,wikipedia_link,keywords
0,302672,AD,Andorra,EU,https://en.wikipedia.org/wiki/Andorra,
1,302618,AE,United Arab Emirates,AS,https://en.wikipedia.org/wiki/United_Arab_Emir...,"UAE,مطارات في الإمارات العربية المتحدة"
2,302619,AF,Afghanistan,AS,https://en.wikipedia.org/wiki/Afghanistan,
3,302722,AG,Antigua and Barbuda,,https://en.wikipedia.org/wiki/Antigua_and_Barbuda,
4,302723,AI,Anguilla,,https://en.wikipedia.org/wiki/Anguilla,


In [64]:
airports.head()

Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords
0,6523,00A,heliport,Total Rf Heliport,40.070801,-74.933601,11.0,,US,US-PA,Bensalem,no,00A,,00A,,,
1,323361,00AA,small_airport,Aero B Ranch Airport,38.704022,-101.473911,3435.0,,US,US-KS,Leoti,no,00AA,,00AA,,,
2,6524,00AK,small_airport,Lowell Field,59.9492,-151.695999,450.0,,US,US-AK,Anchor Point,no,00AK,,00AK,,,
3,6525,00AL,small_airport,Epps Airpark,34.864799,-86.770302,820.0,,US,US-AL,Harvest,no,00AL,,00AL,,,
4,6526,00AR,closed,Newport Hospital & Clinic Heliport,35.6087,-91.254898,237.0,,US,US-AR,Newport,no,,,,,,00AR


In [67]:
# Calculate the number of large airports for each country

airports_by_country = pd.DataFrame()

for country_code in countries['code']:
    # extract the large airports from that country
    large_airports_country = airports[(airports['iso_country'] == country_code) & (airports['type'] == "large_airport")] 
#     print(large_airports_country)
    # count the number of large airports
    num_large_airport = large_airports_country.shape[0]
    
    # add a row in airports_by_country with the country name and the number of large airports
    airports_by_country.loc[country_code, 'Number of Large Airports'] = num_large_airport

airports_by_country

Unnamed: 0,Number of Large Airports
AD,0.0
AE,4.0
AF,0.0
AG,0.0
AI,0.0
...,...
YT,0.0
ZA,4.0
ZM,1.0
ZW,1.0


In [11]:
# Find the top 5 countries having the largest amount of large airports



#### 5. Merging

In [12]:
# Merge the above result with countries data frame to find the name of the countries



In [13]:
# Append full country name and region name to airports.

