In [1]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

In [2]:
def get_value(cell):
    
    """ 
        Extract text from the element, and convert them into numbers when possible. 
        Take a cell tag.
        Return the field value.
    """
    
    text = ''.join(parent.find_all(text=True, recursive=False)).strip().replace(u'\xa0', u'')
    
    if text.replace(" ", "").isnumeric():
        return int(text.replace(" ", ""))
    
    elif text.replace(" ", "").replace(".", "").isnumeric():
        return float(text.replace(" ", ""))
    
    else:
        return text

In [3]:
def country_code(country):
    
    """ 
        Find the two digit country codes using the country names, based on ISO 3166-1.
        Take a country name.
        Return the country code.
    """
    
    file = pd.read_csv("data_csv.csv")
    mapping = dict(zip(file["Name"].to_list(),file["Code"].to_list()))
    
    name = country.title()
    iso_code = mapping[name]
    
    return iso_code.lower()

In [4]:
def general_info(soup):
    
    """ 
        Extract General Information from the UNdata database.
        Take a bs4.BeautifulSoup object generated from querying the country's profile page.
        Return a dataframe containing the country's General Information
    """
    
    tables = soup.find_all("table")
    rows = tables[1].find_all("tr")
    
    index = [[get_value(r.find_all("td")[0]) for r in rows]]
    value = [[get_value(r.find_all("td")[-1]) for r in rows]]
    
    data = dict(zip([2019], value))
    
    df = pd.DataFrame(data, index=index)
    
    return df

In [5]:
def indicator(soup, indicator):
    
    """ 
        Extract Indicators information from the UNdata database.
        Take a bs4.BeautifulSoup object generated from querying the country's profile page
           & the name of the indicator.
        Return a dataframe containing the country's indicators data (3 years). 
    """
    
    tables = soup.find_all("table")
    
    if indicator == 'Economic indicators':
        table = tables[2]
    elif indicator == 'Social indicators':
        table = tables[3]
    elif indicator == 'Environment and infrastructure indicators':
        table = tables[4]
    
    rows = table.find_all("tr")
    col_num = len(rows[0].find_all("td"))
        
    year = [get_value(rows[0].find_all("td")[i]) for i in range(col_num)][1:] 

    col_names = ["Unit"]+year
    
    index = [get_value(r.find_all("td")[0]) for r in rows[1:]] 

    unit = [[get_value(r.find_all("td")[0].small) for r in rows[1:]]]
    
    value = [[(get_value(r.find_all("td")[i].small)) for r in rows[1:]] for i in range(-col_num+1,0)]
    
        
    data = dict(zip(col_names, unit+value))
    
    df = pd.DataFrame(data, index=index)
    
    return df

In [24]:
def get_profile(country):
    
    """ 
        The main process.
        Take a country name.
        Return a list of two dataframes, which contains the country's General Information 
                and Indicators data (3 years) respectively. 
    """
    
    code = country_code(country)
    
    url="http://data.un.org/en/iso/"+code+".html"
    response = requests.get(url)
    soup = BeautifulSoup(response.content,'lxml')
    
    section = [i.get_text() for i in soup.find_all("summary")]
    
    GI_df = general_info(soup)
    
    Indicators_df = pd.concat([indicator(soup, i) for i in section[1:]], keys=section[1:])
    
    return [GI_df, Indicators_df]

In [31]:
get_profile("italy")[0]

Unnamed: 0,2019
Region,Southern Europe
"Population(000, 2019)",60550
"Pop. density(per km2, 2019)",205.9
Capital city,Rome
"Capital city pop.(000, 2019)",4234
UN membership date,14-Dec-55
Surface area(km2),302073
Sex ratio(m per 100 f),94.8
National currency,Euro (EUR)
Exchange rate(per US$),0.9


In [32]:
get_profile("italy")[1]

Unnamed: 0,Unnamed: 1,Unit,2005,2010,2019
Economic indicators,GDP: Gross domestic product,(million current US$),1852616,2125058,1943835
Economic indicators,GDP growth rate,"(annual %, const. 2010 prices)",0.9,1.7,1.6
Economic indicators,GDP per capita,(current US$),31502.5,35577.9,32746.6
Economic indicators,Economy: Agriculture,(% of Gross Value Added),2.2,2,2.1
Economic indicators,Economy: Industry,(% of Gross Value Added),25.8,24.4,24.1
Economic indicators,Economy: Services and other activity,(% of GVA),71.9,73.7,73.8
Economic indicators,Employment: Agriculture,(% of employed),4.2,3.8,3.8
Economic indicators,Employment: Industry,(% of employed),30.7,28.6,26
Economic indicators,Employment: Services,(% employed),65.1,67.6,70.2
Economic indicators,Unemployment,(% of labour force),7.7,8.4,10.4
