# XML example and exercise
****
+ study examples of accessing nodes in XML tree structure  
+ work on exercise to be completed and submitted
****
+ reference: https://docs.python.org/2.7/library/xml.etree.elementtree.html
+ data source: http://www.dbis.informatik.uni-goettingen.de/Mondial
****

In [1]:
from xml.etree import ElementTree as ET
import pandas as pd
import numpy as np
%matplotlib inline
import seaborn as sns

## XML example

+ for details about tree traversal and iterators, see https://docs.python.org/2.7/library/xml.etree.elementtree.html

In [2]:
document_tree = ET.parse( './data/mondial_database_less.xml' )

In [3]:
# print names of all countries
for child in document_tree.getroot():
    print child.find('name').text

Albania
Greece
Macedonia
Serbia
Montenegro
Kosovo
Andorra


In [4]:
# print names of all countries and their cities
for element in document_tree.iterfind('country'):
    print '\n ' + element.find('name').text + ':\n',
    capitals_string = ''
    for subelement in element.getiterator('city'):
        capitals_string += subelement.find('name').text  + ' '
    print capitals_string[:-2]


 Albania:
Tirana Shkodër Durrës Vlorë Elbasan Korç

 Greece:
Komotini Kavala Athina Peiraias Peristeri Acharnes Patra Kozani Kerkyra Ioannina Thessaloniki Iraklio Chania Ermoupoli Rhodes Tripoli Lamia Chalkida Larissa Volos Mytilini Karye

 Macedonia:
Skopje Kumanov

 Serbia:
Beograd Novi Sad Ni

 Montenegro:
Podgoric

 Kosovo:
Prishtin

 Andorra:
Andorra la Vell


****
## XML exercise

Using data in 'data/mondial_database.xml', the examples above, and refering to https://docs.python.org/2.7/library/xml.etree.elementtree.html, find

1. 10 countries with the lowest infant mortality rates
2. 10 cities with the largest population
3. 10 ethnic groups with the largest overall populations (sum of best/latest estimates over all countries)
4. name and country of a) longest river, b) largest lake and c) airport at highest elevation

In [5]:
document = ET.parse( './data/mondial_database.xml' )

In [6]:
root = document.getroot()

## Countries Abbrivation

In [7]:
country = []
abrv = []
abrv_df = pd.DataFrame(columns=['country', 'abrv'])
for elm in root.iterfind('country'):
    country.append(elm.find('name').text)
    abrv.append(elm.attrib.get('car_code'))
    
abrv_df.country = country
abrv_df.abrv= abrv
abrv_df.set_index('abrv', inplace=True)
abrv_df.head(10)

Unnamed: 0_level_0,country
abrv,Unnamed: 1_level_1
AL,Albania
GR,Greece
MK,Macedonia
SRB,Serbia
MNE,Montenegro
KOS,Kosovo
AND,Andorra
F,France
E,Spain
A,Austria


***
## Countries populations

In [8]:
country = []
year = []
pop = []
country_pop_df = pd.DataFrame(columns=['country', 'year', 'population'])
for elm in root.iterfind('country'):
    for elm_pop in elm.iterfind('population'):
        country.append(elm.find('name').text)
        pop.append(elm_pop.text)
        year.append(elm_pop.attrib.get('year'))
        
country_pop_df.country = country
country_pop_df.population = pop
country_pop_df.year = year
country_pop_df.population = country_pop_df.population.astype(float)
country_pop_df.head(10)

Unnamed: 0,country,year,population
0,Albania,1950,1214489.0
1,Albania,1960,1618829.0
2,Albania,1970,2138966.0
3,Albania,1980,2734776.0
4,Albania,1990,3446882.0
5,Albania,1997,3249136.0
6,Albania,2000,3304948.0
7,Albania,2001,3069275.0
8,Albania,2011,2800138.0
9,Greece,1861,1096810.0


### Last measure population of Countries

In [40]:
country_recent_pop_df = country_pop_df.copy()
country_recent_pop_df = country_recent_pop_df.\
                        ix[country_recent_pop_df.groupby(['country']).year.idxmax()].set_index('country')
country_recent_pop_df.head(10)

Unnamed: 0_level_0,year,population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,2013,26023100.0
Albania,2011,2800138.0
Algeria,2010,37062820.0
American Samoa,2010,55519.0
Andorra,2011,78115.0
Angola,2014,24383301.0
Anguilla,2011,13037.0
Antigua and Barbuda,2011,81799.0
Argentina,2014,42669500.0
Armenia,2013,3026879.0


In [41]:
country_recent_pop_df = country_recent_pop_df.sort_values(by='population', ascending=False)
country_recent_pop_df.head(10)

Unnamed: 0_level_0,year,population
country,Unnamed: 1_level_1,Unnamed: 2_level_1
China,2013,1360720000.0
India,2011,1210855000.0
United States,2014,318857100.0
Indonesia,2014,252124500.0
Brazil,2014,202768600.0
Pakistan,2010,173149300.0
Nigeria,2011,164294500.0
Bangladesh,2011,149772400.0
Russia,2014,143666900.0
Japan,2013,127298000.0


In [11]:
print 'World Population: ' + `country_recent_pop_df.population.sum()`

World Population: 6971894285.0


### There is ap problem here, the populations are measured in different years
***

***
# Q1: 10 countries with the lowest infant mortality rates

***
### make a table of infant mortality per each country

In [42]:
country = []
mortality = []
mortality_df = pd.DataFrame(columns=['country', 'infant_mortality'])
for elm in root.getiterator('country'):
    country.append(elm.find('name').text)
    if elm.find('infant_mortality') is not None:
        mortality.append(elm.find('infant_mortality').text)
    else:
        mortality.append(np.nan)

mortality_df['country'] = country
mortality_df['infant_mortality'] = mortality
mortality_df['infant_mortality'] = mortality_df['infant_mortality'].astype(float)
mortality_df.set_index('country', inplace=True)
mortality_df.head(10)

Unnamed: 0_level_0,infant_mortality
country,Unnamed: 1_level_1
Albania,13.19
Greece,4.78
Macedonia,7.9
Serbia,6.16
Montenegro,
Kosovo,
Andorra,3.69
France,3.31
Spain,3.33
Austria,4.16


### CLeaning missing data

#### There is some countries some with missing values:

In [13]:
mortality_df[mortality_df.infant_mortality.isnull()]

Unnamed: 0_level_0,infant_mortality
country,Unnamed: 1_level_1
Montenegro,
Kosovo,
Holy See,
Ceuta,
Melilla,
Svalbard,
Christmas Island,
Cocos Islands,
Curacao,
Saint Martin,


#### dropping out these missing data

In [43]:
mortality_df = mortality_df.dropna()
mortality_df.head(10)

Unnamed: 0_level_0,infant_mortality
country,Unnamed: 1_level_1
Albania,13.19
Greece,4.78
Macedonia,7.9
Serbia,6.16
Andorra,3.69
France,3.31
Spain,3.33
Austria,4.16
Czech Republic,2.63
Germany,3.46


***
### Top 10 countries with Lowest infant mortality 

In [15]:
mortality_df.sort_values(by='infant_mortality').head(10)

Unnamed: 0_level_0,infant_mortality
country,Unnamed: 1_level_1
Monaco,1.81
Japan,2.13
Bermuda,2.48
Norway,2.48
Singapore,2.53
Sweden,2.6
Czech Republic,2.63
Hong Kong,2.73
Macao,3.13
Iceland,3.15


### 10 Worst countries in infant mortality

In [16]:
mortality_df.sort_values(by='infant_mortality', ascending=False).head(10)

Unnamed: 0_level_0,infant_mortality
country,Unnamed: 1_level_1
Western Sahara,145.82
Afghanistan,117.23
Mali,104.34
Somalia,100.14
Central African Republic,92.86
Guinea-Bissau,90.92
Chad,90.3
Niger,86.27
Angola,79.99
Burkina Faso,76.8


***
# Q2: 10 cities with the largest population

***
### Finding each city population in different years

In [17]:
country = []
city = []
city_pop = []
pop_year = []

city_pop_df = pd.DataFrame(columns=['country', 'city', 'population', 'year'])
for elm_country in root.iterfind('country'):
    for elm_city in elm_country.getiterator('city'):
        for elm_pop in elm_city.iterfind('population'):
            country.append(elm_country.find('name').text)
            city.append(elm_city.find('name').text)
            city_pop.append(elm_pop.text)
            pop_year.append(elm_pop.attrib.get('year'))

city_pop_df.country = country
city_pop_df.city = city
city_pop_df.population = city_pop
city_pop_df.year = pop_year
city_pop_df.population = city_pop_df.population.astype(float)
city_pop_df.set_index(['country', 'city', 'year']).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,population
country,city,year,Unnamed: 3_level_1
Albania,Tirana,1987,192000.0
Albania,Tirana,1990,244153.0
Albania,Tirana,2011,418495.0
Albania,Shkodër,1987,62000.0
Albania,Shkodër,2011,77075.0
Albania,Durrës,1987,60000.0
Albania,Durrës,2011,113249.0
Albania,Vlorë,1987,56000.0
Albania,Vlorë,2011,79513.0
Albania,Elbasan,1987,53000.0


***
### missing data:

In [18]:
city_pop_df = city_pop_df.dropna()

***
### Recnt measured population of each city

In [19]:
city_pop_df.ix[city_pop_df.groupby(['country', 'city']).year.idxmax(),:].head(20)

Unnamed: 0,country,city,population,year
3322,Afghanistan,Herat,335200.0,2004
3321,Afghanistan,Kabul,2435400.0,2004
3323,Afghanistan,Kandahar,311800.0,2004
3324,Afghanistan,Mazar-i-Sharif,288700.0,2004
6,Albania,Durrës,113249.0,2011
10,Albania,Elbasan,78703.0,2011
12,Albania,Korçë,51152.0,2011
4,Albania,Shkodër,77075.0,2011
2,Albania,Tirana,418495.0,2011
8,Albania,Vlorë,79513.0,2011


***
### Top 10 cities with Largest Last measure population

In [20]:
city_pop_df.ix[city_pop_df.groupby(['country', 'city']).year.idxmax(),:].\
            sort_values(by='population', ascending=False).head(10)

Unnamed: 0,country,city,population,year
3750,China,Shanghai,22315474.0,2010
2607,Turkey,Istanbul,13710512.0,2012
4303,India,Mumbai,12442373.0,2011
1546,Russia,Moskva,11979529.0,2013
3746,China,Beijing,11716620.0,2010
8208,Brazil,São Paulo,11152344.0,2010
3754,China,Tianjin,11090314.0,2010
3364,China,Guangzhou,11071424.0,2010
4399,India,Delhi,11034555.0,2011
3371,China,Shenzhen,10358381.0,2010


***
### Top 10 cities with Lowest Last measure population

In [21]:
city_pop_df.ix[city_pop_df.groupby(['country', 'city']).year.idxmax(),:].\
            sort_values(by='population', ascending=True).head(10)

Unnamed: 0,country,city,population,year
1365,Ukraine,Prypjat,0.0,2001
7282,Montserrat,Plymouth,0.0,2009
67,Greece,Karyes,233.0,2014
7515,Palau,Melekeok,391.0,2009
7520,Tokelau,Fakaofo,490.0,2011
2905,Norway,Hermannsverk,706.0,1987
3015,Holy See,Vatican City,842.0,2014
3013,Monaco,Monaco,975.0,2008
7284,Sint Maarten,Philipsburg,1338.0,2006
7286,Saint Barthelemy,Gustavia,2299.0,2006


***
### It's Odd, the '0' population of cities 'Plymouth' and 'Prypjat', However, it's almost True, as I searched in wikipedia: 
#### "Eruptions destroyed Montserrat's Georgian era capital city of Plymouth. Between 1995 and 2000, two-thirds of the island's population was forced to flee, primarily to the United Kingdom, leaving fewer than 1,200 people on the island as of 1997 (rising to nearly 5,000 by 2016)."
### and also there is another history for 'Prypjat' I found it in "infoplease.com":
#### Thirty years after the worst nuclear disaster in history, Chernobyl, a former cultural center, has become an abandoned city in northern Ukraine. Pripyat, the city founded in 1970 to house the workers for the nearby Chernobyl Nuclear Power Plant, is also now described as a dead town. 
***

In [22]:
city_pop_df[city_pop_df['city']=='Prypjat'].sort_values(by='year')

Unnamed: 0,country,city,population,year
1362,Ukraine,Prypjat,0.0,1970
1363,Ukraine,Prypjat,49360.0,1986
1364,Ukraine,Prypjat,0.0,1990
1365,Ukraine,Prypjat,0.0,2001


In [23]:
city_pop_df[city_pop_df['city']=='Plymouth'].sort_values(by='year')

Unnamed: 0,country,city,population,year
7281,Montserrat,Plymouth,3500.0,1995
7282,Montserrat,Plymouth,0.0,2009
3277,United Kingdom,Plymouth,234982.0,2011


***
# Q3: 10 ethnic groups with the largest overall populations (sum of best/latest estimates over all countries)

In [24]:
ethnic = []
ethnic_pcg = []
country = []
country_pop = []
ethnic_pop_df = pd.DataFrame(columns=['country', 'ethnic', 'ethnic percentage'])
for elm in root.getiterator('country'):
    for subelm in elm.getiterator('ethnicgroup'):
        country.append(elm.find('name').text)
        ethnic.append(subelm.text)
        ethnic_pcg.append(subelm.attrib['percentage'])
        
ethnic_pop_df.country = country
ethnic_pop_df.ethnic = ethnic
ethnic_pop_df['ethnic percentage'] = ethnic_pcg
ethnic_pop_df['ethnic percentage'] = ethnic_pop_df['ethnic percentage'].astype('float')
ethnic_pop_df.head(10)

Unnamed: 0,country,ethnic,ethnic percentage
0,Albania,Albanian,95.0
1,Albania,Greek,3.0
2,Greece,Greek,93.0
3,Macedonia,Macedonian,64.2
4,Macedonia,Albanian,25.2
5,Macedonia,Turkish,3.9
6,Macedonia,Gypsy,2.7
7,Macedonia,Serb,1.8
8,Serbia,Serb,82.9
9,Serbia,Montenegrin,0.9


***
### adding populations of each country

In [25]:
ethnic_pop_df = pd.merge(ethnic_pop_df, country_recent_pop_df, left_on='country', right_on='country', right_index=True)
ethnic_pop_df = ethnic_pop_df.rename(columns={'population':'country population'})
ethnic_pop_df.head(10)

Unnamed: 0,country,ethnic,ethnic percentage,year,country population
0,Albania,Albanian,95.0,2011,2800138.0
1,Albania,Greek,3.0,2011,2800138.0
2,Greece,Greek,93.0,2011,10816286.0
3,Macedonia,Macedonian,64.2,2011,2059794.0
4,Macedonia,Albanian,25.2,2011,2059794.0
5,Macedonia,Turkish,3.9,2011,2059794.0
6,Macedonia,Gypsy,2.7,2011,2059794.0
7,Macedonia,Serb,1.8,2011,2059794.0
8,Serbia,Serb,82.9,2011,7120666.0
9,Serbia,Montenegrin,0.9,2011,7120666.0


***
### computing ethnic groups populations

In [26]:
ethnic_pop_df['ethnic population'] = ethnic_pop_df['ethnic percentage']/100.0 * ethnic_pop_df['country population']
ethnic_pop_df.head(10)

Unnamed: 0,country,ethnic,ethnic percentage,year,country population,ethnic population
0,Albania,Albanian,95.0,2011,2800138.0,2660131.0
1,Albania,Greek,3.0,2011,2800138.0,84004.14
2,Greece,Greek,93.0,2011,10816286.0,10059150.0
3,Macedonia,Macedonian,64.2,2011,2059794.0,1322388.0
4,Macedonia,Albanian,25.2,2011,2059794.0,519068.1
5,Macedonia,Turkish,3.9,2011,2059794.0,80331.97
6,Macedonia,Gypsy,2.7,2011,2059794.0,55614.44
7,Macedonia,Serb,1.8,2011,2059794.0,37076.29
8,Serbia,Serb,82.9,2011,7120666.0,5903032.0
9,Serbia,Montenegrin,0.9,2011,7120666.0,64085.99


***
### 10 Largest ethnic groups

In [27]:
ethnic_pop_df.groupby('ethnic')[['ethnic population']].sum().\
              sort_values(by='ethnic population', ascending=False).head(10)

Unnamed: 0_level_0,ethnic population
ethnic,Unnamed: 1_level_1
Han Chinese,1245059000.0
Indo-Aryan,871815600.0
European,494872200.0
African,318325100.0
Dravidian,302713700.0
Mestizo,157734400.0
Bengali,146776900.0
Russian,131857000.0
Japanese,126534200.0
Malay,121993600.0


***
# Q4: name and country of a) longest river, b) largest lake and c) airport at highest elevation

***
## Q4-1: Longest Rivers

In [28]:
river_name = []
river_country_abrv = []
river_length = []
river_df = pd.DataFrame(columns={'river_name', 'country_abrv', 'river_length'})
for elm in root.getiterator('river'):
    river_name.append(elm.find('name').text)
    if elm.find('length') is not None:
        river_length.append(elm.find('length').text)
    else:
        river_length.append(np.nan)
    river_country_abrv.append(elm.find('source').attrib.get('country'))
    
river_df.river_name = river_name
river_df.country_abrv = river_country_abrv
river_df.river_length = river_length
river_df.river_length = river_df.river_length.astype(float)
river_df.head(10)

Unnamed: 0,river_length,country_abrv,river_name
0,230.0,IS,Thjorsa
1,206.0,IS,Joekulsa a Fjoellum
2,604.0,N,Glomma
3,322.0,N,Lagen
4,93.0,S,Goetaaelv
5,460.0,N,Klaraelv
6,470.0,S,Umeaelv
7,520.0,S,Dalaelv
8,320.0,S,Vaesterdalaelv
9,241.0,S,Oesterdalaelv


***
### Adding Country name based its abbreviation

In [29]:
river_df = pd.merge(river_df, abrv_df, left_on='country_abrv', right_on='abrv', right_index=True)
river_df = river_df[['river_name', 'country', 'country_abrv', 'river_length']]
river_df.head(10)

Unnamed: 0,river_name,country,country_abrv,river_length
0,Thjorsa,Iceland,IS,230.0
1,Joekulsa a Fjoellum,Iceland,IS,206.0
2,Glomma,Norway,N,604.0
3,Lagen,Norway,N,322.0
5,Klaraelv,Norway,N,460.0
4,Goetaaelv,Sweden,S,93.0
6,Umeaelv,Sweden,S,470.0
7,Dalaelv,Sweden,S,520.0
8,Vaesterdalaelv,Sweden,S,320.0
9,Oesterdalaelv,Sweden,S,241.0


***
### Longest Rivers

In [30]:
river_df.sort_values(by='river_length', ascending=False).head(10)

Unnamed: 0,river_name,country,country_abrv,river_length
174,Amazonas,Peru,PE,6448.0
137,Jangtse,China,CN,6380.0
136,Hwangho,China,CN,4845.0
123,Lena,Russia,R,4400.0
205,Zaire,Zaire,ZRE,4374.0
138,Mekong,China,CN,4350.0
115,Irtysch,Kazakhstan,KAZ,4248.0
186,Niger,Guinea,RG,4184.0
160,Missouri,United States,USA,4130.0
119,Jenissej,Russia,R,4092.0


### Shortest Rivers

In [31]:
river_df.sort_values(by='river_length', ascending=True).head(10)

Unnamed: 0,river_name,country,country_abrv,river_length
153,Straits of Mackinac,United States,USA,0.1
46,Würm,Germany,D,35.0
81,Limmat,Switzerland,CH,36.3
41,Brigach,Germany,D,43.0
87,Buna,Albania,AL,44.0
40,Breg,Germany,D,45.9
102,Newa,Russia,R,74.0
35,Mincio,Italy,I,75.0
104,Narva,Russia,R,78.0
236,Eucumbene River,Australia,AUS,83.0


***
### Problem: As we know 'Nile' river is the 1st or 2nd longest river, so what happens to it in this table?

### maybe it's length value is missed. So, it's usefull to see the missing data in this dataframe, and also, searching all rivers containing 'Nile' in its name.

***
### 1-Rivers with not mentioned length:

In [32]:
river_df[river_df.river_length.isnull()]

Unnamed: 0,river_name,country,country_abrv,river_length
190,Nile,Sudan,SUD,
193,White Nile,South Sudan,SSD,
198,Bahr el-Djebel/Albert-Nil,Uganda,EAU,
200,Victoria Nile,Uganda,EAU,
222,Lualaba,Zaire,ZRE,



#### As seen, 4 of 5 rivers with no length in the table blongs to 'Nile' River
***

***
### 2-Rivers containing 'Nile' in its name

In [33]:
river_df[river_df.river_name.str.contains('Nile')]

Unnamed: 0,river_name,country,country_abrv,river_length
190,Nile,Sudan,SUD,
192,Blue Nile,Ethiopia,ETH,1783.0
193,White Nile,South Sudan,SSD,
200,Victoria Nile,Uganda,EAU,


#### There is 4 river names, containing 'Nile' in its name, it means as 'Nile' passes through different countries, it takes another attribute in its name. However, unfortunately, only 1 of 4, has length value.
***

***
## Q4-2: Largest Lakes

In [34]:
lake_area = []
lake_name = []
lake_country = []
lake_df = pd.DataFrame(columns={'lake_name', 'country_abrv', 'lake_area'})
for elm in root.getiterator('lake'):
    lake_name.append(elm.find('name').text)
    if elm.find('area') is not None:
        lake_area.append(elm.find('area').text)
    else:
        lake_area.append(np.nan)
    
    if elm.find('located') is not None:
        lake_country.append(elm.find('located').attrib['country'])
    else:
        lake_country.append(np.nan)
    
lake_df.country_abrv = lake_country
lake_df.lake_name = lake_name
lake_df.lake_area = lake_area
lake_df.lake_area = lake_df.lake_area.astype(float)
lake_df.head(10)

Unnamed: 0,country_abrv,lake_name,lake_area
0,SF,Inari,1040.0
1,SF,Oulujaervi,928.0
2,SF,Kallavesi,472.0
3,SF,Saimaa,4370.0
4,SF,Paeijaenne,1118.0
5,N,Mjoesa-See,368.0
6,S,Storuman,173.0
7,S,Siljan,290.0
8,S,Maelaren,1140.0
9,S,Vaenern,5648.0


***
### Adding country name to its country abrviation

In [35]:
lake_df = pd.merge(lake_df, abrv_df, left_on='country_abrv', right_on='abrv', right_index=True)
lake_df = lake_df[['lake_name', 'country', 'country_abrv', 'lake_area']]
lake_df.head(10)

Unnamed: 0,lake_name,country,country_abrv,lake_area
0,Inari,Finland,SF,1040.0
1,Oulujaervi,Finland,SF,928.0
2,Kallavesi,Finland,SF,472.0
3,Saimaa,Finland,SF,4370.0
4,Paeijaenne,Finland,SF,1118.0
5,Mjoesa-See,Norway,N,368.0
6,Storuman,Sweden,S,173.0
7,Siljan,Sweden,S,290.0
8,Maelaren,Sweden,S,1140.0
9,Vaenern,Sweden,S,5648.0


***
### Largest Lakes

In [36]:
lake_df.sort_values(by='lake_area', ascending=False).head(10)

Unnamed: 0,lake_name,country,country_abrv,lake_area
54,Caspian Sea,Russia,R,386400.0
109,Lake Superior,Canada,CDN,82103.0
81,Lake Victoria,Tanzania,EAT,68870.0
106,Lake Huron,Canada,CDN,59600.0
108,Lake Michigan,United States,USA,57800.0
47,Dead Sea,Israel,IL,41650.0
83,Lake Tanganjika,Zaire,ZRE,32893.0
98,Great Bear Lake,Canada,CDN,31792.0
43,Ozero Baikal,Russia,R,31492.0
89,Lake Malawi,Mozambique,MOC,29600.0


***

***
## Q4-3: Highest Located Airports


In [37]:
country = []
airport_name = []
airport_elevation = []
airport_df = pd.DataFrame(columns={'airport_name', 'country_abrv', 'elevation'})
for elm in root.getiterator('airport'):
    airport_name.append(elm.find('name').text)
    airport_elevation.append(elm.find('elevation').text) 
    country.append(elm.attrib.get('country'))
    
airport_df.country_abrv = country
airport_df.airport_name = airport_name
airport_df.elevation = airport_elevation
airport_df.elevation = airport_df.elevation.astype(float)
airport_df.head(10)

Unnamed: 0,elevation,country_abrv,airport_name
0,977.0,AFG,Herat
1,1792.0,AFG,Kabul Intl
2,38.0,AL,Tirana Rinas
3,811.0,DZ,Cheikh Larbi Tebessi
4,822.0,DZ,Batna Airport
5,6.0,DZ,Soummam
6,1377.0,DZ,Tamanrasset
7,88.0,DZ,Biskra
8,691.0,DZ,Mohamed Boudiaf Intl
9,1024.0,DZ,Ain Arnat Airport


***
### Adding Countries name based on its abbriviations

In [38]:
airport_df = pd.merge(airport_df, abrv_df, left_on='country_abrv', right_on='abrv', right_index=True)
airport_df = airport_df[['airport_name', 'country', 'country_abrv', 'elevation']]
airport_df.head(10)

Unnamed: 0,airport_name,country,country_abrv,elevation
0,Herat,Afghanistan,AFG,977.0
1,Kabul Intl,Afghanistan,AFG,1792.0
2,Tirana Rinas,Albania,AL,38.0
3,Cheikh Larbi Tebessi,Algeria,DZ,811.0
4,Batna Airport,Algeria,DZ,822.0
5,Soummam,Algeria,DZ,6.0
6,Tamanrasset,Algeria,DZ,1377.0
7,Biskra,Algeria,DZ,88.0
8,Mohamed Boudiaf Intl,Algeria,DZ,691.0
9,Ain Arnat Airport,Algeria,DZ,1024.0


***
### Top 10 Airports located in Highst Elevations

In [39]:
airport_df.sort_values(by='elevation', ascending=False).head(10)

Unnamed: 0,airport_name,country,country_abrv,elevation
80,El Alto Intl,Bolivia,BOL,4063.0
219,Lhasa-Gonggar,China,CN,4005.0
241,Yushu Batang,China,CN,3963.0
813,Juliaca,Peru,PE,3827.0
815,Teniente Alejandro Velasco Astete Intl,Peru,PE,3311.0
82,Juana Azurduy De Padilla,Bolivia,BOL,2905.0
334,Mariscal Sucre Intl,Ecuador,EC,2813.0
805,Coronel Fap Alfredo Mendivil Duarte,Peru,PE,2719.0
807,Mayor General FAP Armando Revoredo Iglesias Ai...,Peru,PE,2677.0
692,Licenciado Adolfo Lopez Mateos Intl,Mexico,MEX,2581.0
