## Example: Analyzing Airport Operations

Download `airports.csv`, `airport-frequencies.csv`, `countries.csv`, `regions.csv` from [OurAirports.com](https://ourairports.com/data/)

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Load the datasets
airports = pd.read_csv("https://ourairports.com/data/airports.csv", sep=",")
airports.head()

In [None]:
print("Size:\n", airports.shape)
print("Data types:\n", airports.dtypes)

In [None]:
airports.head()

In [None]:
airports.isnull().sum()

In [None]:
# Load other csv files
airport_freq = pd.read_csv("https://ourairports.com/data/airport-frequencies.csv", sep=',')
countries = pd.read_csv("https://ourairports.com/data/countries.csv", sep=',')
regions = pd.read_csv("https://ourairports.com/data/regions.csv", sep=',')

In [None]:
def basic_info(df):
    print("Size:")
    print(df.shape)
    print("="*20)
    print("Data types:")
    print(df.dtypes)
    print("="*20)
    print("Missing Values:")
    print(df.isnull().sum())
    print(df.head())

In [None]:
basic_info(airport_freq)

In [None]:
basic_info(countries)

In [None]:
basic_info(regions)

#### 1. Select data with multiple conditions

In [None]:
# Find the region code for New York from region data frame.

regions.head()

In [None]:
countries[countries['name'] == 'United States'] # This tells us that the country code for United States is US

In [None]:
regions[(regions['iso_country'] == "US") & (regions['local_code'] == "NY")]

In [None]:
regions[(regions['iso_country'] == "US") & (regions['name'] == "New York")]

In [None]:
# Extract all large airports in New York state from airoprts data frame

airports.head()

In [None]:
set(airports['type'].values)

In [None]:
airports['type'].unique()

In [None]:
airports_NY_large = airports[(airports['iso_region'] == "US-NY") & (airports['type'] == 'large_airport')]

In [None]:
# Extract the name, identification code, and municipality of
# all airports with ISO region "US-NY" and type "large_airport"

airports_NY_large[["name", "ident", "municipality"]].reset_index(drop=True) # reset index to remove original line numbers

#### 2. Sorting

In [None]:
# From airport_freq, extract all communication frequencies for KJFK,
# with frequencies sorted in ascending order

airport_freq.head()

In [None]:
KJFK_freq = airport_freq[airport_freq['airport_ident'] == 'KJFK']

In [None]:
KJFK_freq.sort_values(by="frequency_mhz")

In [None]:
# From airport_freq, extract all communication frequencies for KJFK,
# with frequencies sorted in descending order

KJFK_freq.sort_values(by="frequency_mhz", ascending=False)

In [None]:
# Find the five rows with larget frequency value from the previous data frame

KJFK_freq.head() # by default head() returns the first 5 rows

#### 3. Filter on a list of values

In [None]:
# Extract all communication frequencies used for a large airport in New York state

freq_NY_airports = pd.DataFrame()

for ident in airports_NY_large['ident']:
#     print(ident) # verify that the identification codes are extracted correctly
    freq_airport = airport_freq[airport_freq['airport_ident'] == ident]
#     print(freq_airport) # verify that the frequencies are extracted correctly
    freq_NY_airports = pd.concat([freq_NY_airports, freq_airport])

freq_NY_airports

In [None]:
idents = airports_NY_large['ident']
filter1 = airport_freq['airport_ident'].isin(idents)
airport_freq[filter1]

In [None]:
airport_freq[airport_freq['airport_ident'].isin(airports_NY_large['ident'])]

#### 4. Grouping

In [None]:
countries.head()

In [None]:
airports.head()

In [None]:
# Calculate the number of large airports for each country

airports_by_country = pd.DataFrame()

for country_code in countries['code']:
    # extract the large airports from that country
    large_airports_country = airports[(airports['iso_country'] == country_code) & (airports['type'] == "large_airport")]
#     print(large_airports_country)
    # count the number of large airports
    num_large_airport = large_airports_country.shape[0]

    # add a row in airports_by_country with the country name and the number of large airports
    airports_by_country.loc[country_code, 'Number of Large Airports'] = num_large_airport

airports_by_country

In [None]:
# Find the top 5 countries having the largest amount of large airports



#### 5. Merging

In [None]:
# Merge the above result with countries data frame to find the name of the countries



In [None]:
# Append full country name and region name to airports.

