# Mission: Impossible

Le but de cette mission est de webscraper le world factbook de la CIA et de cartographier les données collectées sur un dashboard cartographique (carte intéractive + widget de présentation des données attributaires).  

Voici quelques ressources qui vous permettront de la réaliser :
- https://www.cia.gov/the-world-factbook/
- https://youtu.be/t9Ed5QyO7qY
- https://ipywidgets.readthedocs.io/
- https://public.opendatasoft.com/explore/dataset/world-administrative-boundaries/table/?sort=iso3
- https://www.cia.gov/the-world-factbook/references/country-data-codes/

Cette mission, si vous l'acceptez, se terminera le **17 décembre 2021 à 18h**. A vous de recruter votre équipe (3 personnes max) formée d'au moins un expert en programmation python. Comme d'habitude, si vous ou l'un de vos agents étiez capturé ou épuisé, l'Institut of Urban Planning and Alpine Geography nierait avoir eu connaissance de vos agissements.

### Chargement des librairies

In [182]:
from bs4 import BeautifulSoup
import csv
import geopandas
# Librairie de cartographie avec interactions et widgets
from ipyleaflet import GeoJSON, Map, WidgetControl
from ipywidgets import HTML, Layout
import json
import pandas
import urllib.request

### Chargement des données

In [223]:
# Chargement des codes normalisés des pays
codes = pandas.read_csv("webscraping/codesxref.csv")
codes.head()

Unnamed: 0,Name,GEC,A3,A2,NUM,STANAG,INTERNET
0,Afghanistan,AF,AF,AFG,004,AFG,.af
1,Akrotiri,AX,-,-,-,-,-
2,Albania,AL,AL,ALB,008,ALB,.al
3,Algeria,AG,DZ,DZA,012,DZA,.dz
4,American Samoa,AQ,AS,ASM,016,ASM,.as


In [231]:
# Chargement des limites administratives des pays
pays = geopandas.read_file("webscraping/world-administrative-boundaries.geojson")
pays.head()

Unnamed: 0,french_short,iso3,status,iso_3166_1_alpha_2_codes,name,region,color_code,continent,geometry
0,Samoa,WSM,Member State,WS,Samoa,Polynesia,WSM,Oceania,"MULTIPOLYGON (((-171.42920 -14.01625, -171.441..."
1,Belgique,BEL,Member State,BE,Belgium,Western Europe,BEL,Europe,"POLYGON ((6.01180 50.75727, 6.05472 50.72361, ..."
2,Bangladesh,BGD,Member State,BD,Bangladesh,Southern Asia,BGD,Asia,"MULTIPOLYGON (((91.89749 21.47666, 91.88693 21..."
3,Israël,ISR,Member State,IL,Israel,Western Asia,ISR,Asia,"POLYGON ((35.62364 33.24573, 35.63249 33.24637..."
4,Norvège,NOR,Member State,NO,Norway,Northern Europe,NOR,Europe,"MULTIPOLYGON (((5.28778 59.21889, 5.25597 59.1..."


In [234]:
# Jointure de 'codes' et 'pays'
p  = pays.merge(codes, how = 'inner', left_on = 'iso_3166_1_alpha_2_codes', right_on = 'A3')
# Projection
pays_codes = p[['geometry', 'french_short', 'A3', 'region', 'continent']]
pays_codes.head()

Unnamed: 0,geometry,french_short,A3,region,continent
0,"MULTIPOLYGON (((-171.42920 -14.01625, -171.441...",Samoa,WS,Polynesia,Oceania
1,"POLYGON ((6.01180 50.75727, 6.05472 50.72361, ...",Belgique,BE,Western Europe,Europe
2,"MULTIPOLYGON (((91.89749 21.47666, 91.88693 21...",Bangladesh,BD,Southern Asia,Asia
3,"POLYGON ((35.62364 33.24573, 35.63249 33.24637...",Israël,IL,Western Asia,Asia
4,"MULTIPOLYGON (((5.28778 59.21889, 5.25597 59.1...",Norvège,NO,Northern Europe,Europe


In [187]:
# url des thèmes disponibles
url_themes = "https://www.cia.gov/the-world-factbook/references/guide-to-country-comparisons/"
page = urllib.request.urlopen(url_themes)
soup = BeautifulSoup(page, 'html.parser')
theme = soup.find_all('a', attrs={'class': 'link-button bold'})
theme_links = []
for link in theme:
    theme_links.append('https://www.cia.gov' + link.get('href'))

In [188]:
theme_links

['https://www.cia.gov/the-world-factbook/field/area/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/population/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/median-age/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/population-growth-rate/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/birth-rate/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/death-rate/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/net-migration-rate/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/maternal-mortality-ratio/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/infant-mortality-rate/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/life-expectancy-at-birth/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/total-fertility-rate/country-comparison',
 'https://www.cia.gov/the-world-factbook/field/hiv-aids-adult-preval

In [56]:
# Pour chaque lien, on applique la procédure vue au TP8
for link in theme_links:
    page = urllib.request.urlopen(link)
    soup = BeautifulSoup(page, 'html.parser')
    table = soup.find('table', attrs={'class': 'content-table table-auto'})
    results = table.find_all('tr')
    # Retrouve le thème
    theme = link.split('/')[5].replace('-','_')
    rows = [] 
    rows.append(['rank', 'country', theme, 'date_of_information'])
    for result in results :
        data = result.find_all('td')
        if len(data) > 0 :
            rank = data[0].getText()
            country = data[1].getText()
            theme_data = data[2].getText().replace(',','') #  remove the decimal separator
            doi = data[3].getText()
            rows.append([rank, country, theme_data, doi])
            
    # Create csv and write rows to output file
    with open('webscraping/' + theme + '.csv','w', newline='') as f_output:
        csv_output = csv.writer(f_output)
        csv_output.writerows(rows)

In [245]:
# Jointure des fichiers csv sur un nombre limité de thèmes
selected_themes = ['area', 'population', 'median_age']
factbook = codes
for s in selected_themes:
    df = pandas.read_csv("webscraping/" + s + ".csv")
    # jointure
    factbook = factbook.merge(df, how = 'inner', left_on = 'Name', right_on = 'country')
# Projection
factbook = factbook[['country', 'A3'] + selected_themes]
factbook.head()

Unnamed: 0,country,A3,area,population,median_age
0,Afghanistan,AF,652230,37466414,19.5
1,Albania,AL,28748,3088385,34.3
2,Algeria,DZ,2381740,43576691,28.9
3,American Samoa,AS,224,46366,27.2
4,Andorra,AD,468,85645,46.2


In [248]:
# Jointure de 'factbook' avec 'pays'
f = factbook.merge(pays_codes, how = 'inner', left_on = 'A3', right_on = 'A3')
# Projection
factbook_pays = f[['geometry', 'french_short', 'region', 'continent', 'area', 'population', 'median_age']]
factbook_pays.head()

Unnamed: 0,geometry,french_short,region,continent,area,population,median_age
0,"POLYGON ((74.91574 37.23733, 74.80873 37.22423...",Afghanistan,Southern Asia,Asia,652230,37466414,19.5
1,"POLYGON ((20.07142 42.56091, 20.10208 42.53347...",Albanie,Southern Europe,Europe,28748,3088385,34.3
2,"POLYGON ((8.62203 36.94137, 8.63222 36.88194, ...",Algérie,Northern Africa,Africa,2381740,43576691,28.9
3,"POLYGON ((-170.63726 -14.28944, -170.74389 -14...",American Samoa,Polynesia,Oceania,224,46366,27.2
4,"POLYGON ((1.72361 42.50944, 1.71889 42.50305, ...",Andorre,Southern Europe,Europe,468,85645,46.2


In [249]:
fp = geopandas.GeoDataFrame(factbook_pays)
fp.to_file('webscraping/factbook_pays.geojson', driver = 'GeoJSON')

## Dashboard cartographique

In [250]:
world = Map(center = [45, 0], zoom = 2, layout = Layout(width='100%', height='600px'))

with open('webscraping/factbook_pays.geojson', 'r') as f:
    factbook_geojson = json.load(f)
    
style = {
    "stroke": True,
    "color": "#0000ff",
    "weight": 2,
    "opacity": 1,
    "fill": True,
    "fillColor": "#0000ff",
    "fillOpacity": 0.1,
}

hover_style = {"fillOpacity": 0.7}

world_geodata = GeoJSON(data = factbook_geojson, style = style, hover_style = hover_style)
world.add_layer(world_geodata)

html = HTML("World Factbook")
html.layout.margin = '0px 20px 20px 20px'
control = WidgetControl(widget = html, position = 'topright')
world.add_control(control)

def update_html(feature, **kwargs):
     html.value = '''
     <h3><b>{}</b></h3>
     <h4>Région : {}</h4> 
     <h4>Continent : {}</h4>
     <h4>Surface : {}  km2</h4>
     <h4>Population : {}</h4>
     <h4>Âge médian : {} ans</h4>
      '''.format(feature['properties']['french_short'],
        feature['properties']['region'],
        feature['properties']['continent'],
        feature['properties']['area'],
        feature['properties']['population'],
        feature['properties']['median_age'])
world_geodata.on_hover(update_html)

world

Map(center=[45, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_text…