<a href="https://colab.research.google.com/github/blongho/countries/blob/master/country_info_processor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import requests
import os
import io

In [2]:
# The web url for country info
countryinfo_url = "https://download.geonames.org/export/dump/countryInfo.txt"

In [3]:
# The headers. Check the weburl above
country_data_cols = ['alpha2','alpha3','id','fips','name','capital','area',
'population','continent','tld','currency_code','currency_name','phone',
'Postal_Code_Format','Postal_Code_Regex','languages',
'geonameid','neighbours','EquivalentFipsCode']

country_data_cols

['alpha2',
 'alpha3',
 'id',
 'fips',
 'name',
 'capital',
 'area',
 'population',
 'continent',
 'tld',
 'currency_code',
 'currency_name',
 'phone',
 'Postal_Code_Format',
 'Postal_Code_Regex',
 'languages',
 'geonameid',
 'neighbours',
 'EquivalentFipsCode']

In [4]:
# Get the text data from the url
text = requests.get(countryinfo_url).text

## Read the country data

In [5]:
# read the countries info
# There are some comments at the beginning of the file. After looking at it,
# 50 rows are skipped to avoid the comments cluttering the 'usefull' data
countries = pd.read_csv(io.StringIO(text), skiprows=50, sep='\t', names=country_data_cols)

In [6]:
countries.head()

Unnamed: 0,alpha2,alpha3,id,fips,name,capital,area,population,continent,tld,currency_code,currency_name,phone,Postal_Code_Format,Postal_Code_Regex,languages,geonameid,neighbours,EquivalentFipsCode
0,AD,AND,20,AN,Andorra,Andorra la Vella,468.0,77006,EU,.ad,EUR,Euro,376,AD###,^(?:AD)*(\d{3})$,ca,3041565,"ES,FR",
1,AE,ARE,784,AE,United Arab Emirates,Abu Dhabi,82880.0,9630959,AS,.ae,AED,Dirham,971,,,"ar-AE,fa,en,hi,ur",290557,"SA,OM",
2,AF,AFG,4,AF,Afghanistan,Kabul,647500.0,37172386,AS,.af,AFN,Afghani,93,,,"fa-AF,ps,uz-AF,tk",1149361,"TM,CN,IR,TJ,PK,UZ",
3,AG,ATG,28,AC,Antigua and Barbuda,St. John's,443.0,96286,,.ag,XCD,Dollar,+1-268,,,en-AG,3576396,,
4,AI,AIA,660,AV,Anguilla,The Valley,102.0,13254,,.ai,XCD,Dollar,+1-264,,,en-AI,3573511,,


In [7]:
# Drop non-useful columns
countries = countries.drop(columns=['Postal_Code_Format','Postal_Code_Regex','geonameid','EquivalentFipsCode','fips'])

## Append the Currency symbols to the currency

In [8]:
from bs4 import BeautifulSoup
import json

In [9]:
# currencies page 
currencies_url = 'https://en.wikipedia.org/wiki/List_of_circulating_currencies'

In [10]:
currency_text = requests.get(currencies_url)

In [11]:
soup = BeautifulSoup(currency_text.content, 'html.parser')

In [12]:
trows = rows = soup.tbody.find_all('tr')
len(trows)

267

In [13]:
from collections import namedtuple

Currency = namedtuple('Currency', ['CurrencyCode', 'CurrencyName', 'CurrencySymbol'])

In [14]:
col_data = []
for row in trows:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    if len(cols) > 0:
        code = str(cols[3]).replace('[','').replace(']','')
        if not code.__contains__('none'):
            col_data.append(Currency(CurrencyCode=code,CurrencyName=cols[1], CurrencySymbol=cols[2]))

In [15]:
currencies = pd.DataFrame(data=col_data, columns=['CurrencyCode','CurrencyName', 'CurrencySymbol'])

In [16]:
cu = currencies[currencies['CurrencyCode'] == 'XOF']
cu

Unnamed: 0,CurrencyCode,CurrencyName,CurrencySymbol
26,XOF,West African CFA franc,Fr
40,XOF,West African CFA franc,Fr
59,XOF,West African CFA franc,Fr
102,XOF,West African CFA franc,Fr
148,XOF,West African CFA franc,Fr
171,XOF,West African CFA franc,Fr
210,XOF,West African CFA franc,Fr
237,XOF,West African CFA franc,Fr


In [17]:
country_list = countries.values.tolist()

In [18]:
country_list[0]

['AD',
 'AND',
 20,
 'Andorra',
 'Andorra la Vella',
 468.0,
 77006,
 'EU',
 '.ad',
 'EUR',
 'Euro',
 '376',
 'ca',
 'ES,FR']

In [19]:
def getCurrencySymbolAndName(currency_code:str, currency_list:list)->str:
    """Get the currency symbol from the currency code

    Args:
        currency_code (str): The currecy code for which the symbol belongs
        currency_list (list): A list of all the currencies

    Returns:
        str: The currency symbol if it exists. Otherwise, None is returned
    """
    for curr in currency_list:
        if curr.CurrencyCode == currency_code:
            return [curr.CurrencySymbol, curr.CurrencyName]
    return None 

In [20]:
for country in country_list:
    currency_code = country[9]
    currency_data = getCurrencySymbolAndName(currency_code=currency_code, currency_list=col_data)
    #print(currency_data)
    if currency_data is not None:
      currency_symbol = currency_data[0]
      currency_name = currency_data[1]
      country[10] = currency_name
    #print(currency_code, currency_symbol)
      country.append(currency_symbol)
     

In [21]:
country_list[45]

['CL',
 'CHL',
 152,
 'Chile',
 'Santiago',
 756950.0,
 18729160,
 'SA',
 '.cl',
 'CLP',
 'Chilean peso',
 '56',
 'es-CL',
 'PE,BO,AR',
 '$']

In [22]:
cols = ['alpha2', 'alpha3', 'id',  'name', 'capital', 'area',
       'population', 'continent', 'tld', 'currency_code', 'currency_name',
       'phone', 'languages', 'neighbours','currency_symbol']

In [23]:
countries = pd.DataFrame(data=country_list, columns=cols)

In [24]:
cols_ordered = ['alpha2', 'alpha3', 'id',  'name', 'capital', 'area',
       'population', 'continent', 'tld', 'currency_code', 'currency_name','currency_symbol',
       'phone', 'languages', 'neighbours']

In [25]:
countries = countries[cols_ordered]

In [26]:
countries.head()

Unnamed: 0,alpha2,alpha3,id,name,capital,area,population,continent,tld,currency_code,currency_name,currency_symbol,phone,languages,neighbours
0,AD,AND,20,Andorra,Andorra la Vella,468.0,77006,EU,.ad,EUR,Euro,€,376,ca,"ES,FR"
1,AE,ARE,784,United Arab Emirates,Abu Dhabi,82880.0,9630959,AS,.ae,AED,United Arab Emirates dirham,د.إ,971,"ar-AE,fa,en,hi,ur","SA,OM"
2,AF,AFG,4,Afghanistan,Kabul,647500.0,37172386,AS,.af,AFN,Afghan afghani,؋,93,"fa-AF,ps,uz-AF,tk","TM,CN,IR,TJ,PK,UZ"
3,AG,ATG,28,Antigua and Barbuda,St. John's,443.0,96286,,.ag,XCD,Eastern Caribbean dollar,$,+1-268,en-AG,
4,AI,AIA,660,Anguilla,The Valley,102.0,13254,,.ai,XCD,Eastern Caribbean dollar,$,+1-264,en-AI,


In [27]:
xaf_countries = countries[countries['currency_code']=='XAF']
xaf_countries.shape

(6, 15)

In [28]:
xaf_countries

Unnamed: 0,alpha2,alpha3,id,name,capital,area,population,continent,tld,currency_code,currency_name,currency_symbol,phone,languages,neighbours
40,CF,CAF,140,Central African Republic,Bangui,622984.0,4666377,AF,.cf,XAF,Central African CFA franc,Fr,236,"fr-CF,sg,ln,kg","TD,SD,CD,SS,CM,CG"
41,CG,COG,178,Republic of the Congo,Brazzaville,342000.0,5244363,AF,.cg,XAF,Central African CFA franc,Fr,242,"fr-CG,kg,ln-CG","CF,GA,CD,CM,AO"
46,CM,CMR,120,Cameroon,Yaounde,475440.0,25216237,AF,.cm,XAF,Central African CFA franc,Fr,237,"en-CM,fr-CM","TD,CF,GA,GQ,CG,NG"
75,GA,GAB,266,Gabon,Libreville,267667.0,2119275,AF,.ga,XAF,Central African CFA franc,Fr,241,fr-GA,"CM,GQ,CG"
87,GQ,GNQ,226,Equatorial Guinea,Malabo,28051.0,1308974,AF,.gq,XAF,Central African CFA franc,Fr,240,"es-GQ,fr,pt","GA,CM"
215,TD,TCD,148,Chad,N'Djamena,1284000.0,15477751,AF,.td,XAF,Central African CFA franc,Fr,235,"fr-TD,ar-TD,sre","NE,LY,CF,SD,CM,NG"


In [29]:
import collections
Country = collections.namedtuple(
    'Country', ['alpha2', 'alpha3', 'id', 'name', 'capital', 'area',
                'population', 'continent', 'tld', 'currency', 'phone', 
                'languages', 'neighbours']
)


In [30]:
country_list = []
for c in countries.itertuples(index=False,name='Country'):
  country_list.append(
      Country(alpha2=c.alpha2,
              alpha3=c.alpha3,
              id=c.id,
              name=c.name,
              capital=c.capital,
              area=c.area,
              population=c.population,
              continent=c.capital,
              tld = c.tld,
              currency=Currency(CurrencyCode=c.currency_code,CurrencyName = c.currency_name, CurrencySymbol=c.currency_symbol),
              phone=c.phone,
              languages = str(c.languages).split(',') is c.languages is not None,
              neighbours= str(c.neighbours).split(',') is c.neighbours is not None
      )
  )

In [31]:
country_list[7]

Country(alpha2='AO', alpha3='AGO', id=24, name='Angola', capital='Luanda', area=1246700.0, population=30809762, continent='Luanda', tld='.ao', currency=Currency(CurrencyCode='AOA', CurrencyName='Angolan kwanza', CurrencySymbol='Kz'), phone='244', languages=False, neighbours=False)

In [35]:
data = countries.to_json(orient='table', index=False, indent=2)

In [36]:
with open('countries.json', 'w') as file:
    file.write(data)