MOROCCAN CENSUS OF 2014
======================
### Project based on data from The General Population and Housing Census of Morocco in 2014
### DATA SOURCE: [RGPH Website](http://rgphentableaux.hcp.ma)

## Part 01: Building MySQL Database and Loading data from [RGPH Website](http://rgphentableaux.hcp.ma)

In [None]:
# Install dependencies
!pip install pymysql requests requests-html

In [None]:
# Import libraries
import json
import requests
import itertools
import multiprocessing
import pymysql
from requests_html import HTMLSession

### Question 01: Mysql Database Diagram
![Mysql DB Diagram](https://i.imgur.com/PVkhz9E.png)

In [None]:
# Question 02: Building Database

#Database Connection
DB_PARAMS = {
    'host':'localhost',
    'port':3306,
    'user':'root',
    'password':'ehtp',
    'database':'ehtp_rgph',
}
mysql_cnx = pymysql.connect(**DB_PARAMS)
mysql_crs = mysql_cnx.cursor()

#Purging tables
print('Dropping tables if they exist...')
mysql_crs.execute('DROP TABLE IF EXISTS observations_habitat;')
mysql_crs.execute('DROP TABLE IF EXISTS observations_activite;')
mysql_crs.execute('DROP TABLE IF EXISTS observations_langue;')
mysql_crs.execute('DROP TABLE IF EXISTS observations_education;')
mysql_crs.execute('DROP TABLE IF EXISTS observations_handicap;')
mysql_crs.execute('DROP TABLE IF EXISTS observations_demographie;')
mysql_crs.execute('DROP TABLE IF EXISTS communes;')
mysql_crs.execute('DROP TABLE IF EXISTS provinces;')
mysql_crs.execute('DROP TABLE IF EXISTS regions;')
mysql_crs.execute('DROP TABLE IF EXISTS pays;')

#Creating Pays table
print('Creating Pays table...')
mysql_crs.execute(
    """
        CREATE TABLE pays (
            id INT NOT NULL AUTO_INCREMENT,
            code VARCHAR ( 5 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
        PRIMARY KEY ( id ) 
        );
    """
)

#Creating Regions table
print('Creating Regions table...')
mysql_crs.execute(
    """
        CREATE TABLE regions (
            id INT NOT NULL AUTO_INCREMENT,
            pays_code VARCHAR ( 5 ),
            code VARCHAR ( 10 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_pays FOREIGN KEY ( pays_code ) REFERENCES pays ( code ) 
        );
    """
)

#Creating Provinces table
print('Creating Provinces table...')
mysql_crs.execute(
    """
        CREATE TABLE provinces (
            id INT NOT NULL AUTO_INCREMENT,
            region_code VARCHAR ( 20 ),
            code VARCHAR ( 20 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_region FOREIGN KEY ( region_code ) REFERENCES regions ( code ) 
        );
    """
)

#Creating Communes table
print('Creating Communes table...')
mysql_crs.execute(
    """
        CREATE TABLE communes (
            id INT NOT NULL AUTO_INCREMENT,
            province_code VARCHAR ( 20 ),
            code VARCHAR ( 20 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_province FOREIGN KEY ( province_code ) REFERENCES provinces ( code ) 
        );
    """
)


OBSERVATIONS_THEMES_LIST = {
    '2':'observations_demographie',
    '3':'observations_handicap',
    '4':'observations_education',
    '5':'observations_langue',
    '6':'observations_activite',
    '7':'observations_habitat'
}

#Creating Observations table
print('Creating Observations table...')
for code, observation in OBSERVATIONS_THEMES_LIST.items():
    mysql_crs.execute(
        f"""
            CREATE TABLE {observation} (
                id INT NOT NULL AUTO_INCREMENT,
                commune_code VARCHAR ( 20 ),
                indicateur VARCHAR ( 200 ) NOT NULL,
                observation DOUBLE NOT NULL,
                PRIMARY KEY ( id ),
            CONSTRAINT fk_{observation}_commune FOREIGN KEY ( commune_code ) REFERENCES communes ( code )
            );
        """
    )

In [None]:
# Question 03: Loading Data into Database from RGPH Website

#It's neccesary to run the precedent cell every time you want to run this

RGPH_URL = 'http://rgphentableaux.hcp.ma/Default1/'
session = HTMLSession()

rgph_html = session.get(RGPH_URL).html

#Filling Pays table
print('Filling Pays table...')
sql_query = "INSERT INTO pays (code, libelle) VALUES ('01', 'maroc')"
mysql_crs.execute(sql_query)

#Filling Regions table
print('Filling Regions table...')
sql_query = "INSERT INTO regions (pays_code, code, libelle) VALUES ('01', %s, %s)"
query_values = []
for opt in rgph_html.find("#REGIONSLIST option"):
    query_value = (opt.attrs['value'].strip(), opt.text.strip())
    query_values.append(query_value)
mysql_crs.executemany(sql_query, query_values)

#Filling Provinces table
print('Filling Provinces table...')

#Json file that provides region code for each province
with open('data/province_region_code.json') as json_file:
    region_province = json.load(json_file)
    sql_query = "INSERT INTO provinces (region_code, code, libelle) VALUES (%s, %s, %s)"
    query_values = []
    for opt in rgph_html.find("#PROVINCESLIST option"):
        province_code = opt.attrs['value'].strip()
        region_code = region_province.get(province_code)
        if region_code:
            query_value = (region_code.get('code_region'), province_code, opt.text.strip())
            query_values.append(query_value)
mysql_crs.executemany(sql_query, query_values)


#Filling Communes table
print('Filling Communes table...')
communes = []
sql_query = "INSERT INTO communes (province_code, code, libelle) VALUES (%s, %s, %s)"
query_values = []
for opt in rgph_html.find("#COMMUNELIST option"):
    commune_code = opt.attrs['value'].strip()[:-1]
    province_code = commune_code[:3]
    query_value = (province_code, commune_code, opt.text.strip())
    query_values.append(query_value)
    communes.append(commune_code)
mysql_crs.executemany(sql_query, query_values)

#Filling observations table
def get_commune_data(commune_code):
    HEADERS = {'Referer': RGPH_URL}
    PREFIX_INDICATEURS_ALLOW =('iuf', 'ium', 'irf', 'irm', 'lu_', 'lr_')
    commune_data = []
    global theme_code
    url = f'{RGPH_URL}getDATA/?type=Commune&CGEO={commune_code}.&them={theme_code}'
    result = requests.get(url, headers=HEADERS).json()
    for line in result:
        indicateur = line.get('INDICATEUR').strip().lower()
        if indicateur[:3] in PREFIX_INDICATEURS_ALLOW:
            valeur = line.get('DATA2014').replace(',','.')
            try:
                observation = float(valeur)
            except ValueError:
                observation = 0
            commune_data.append((commune_code, indicateur, observation))
    return commune_data

print('Filling Observations table...')
# Getting data for multiple communes at the same time
for theme_code, sql_table in OBSERVATIONS_THEMES_LIST.items():
    with multiprocessing.Pool(4) as p:
        themes_data = p.map(get_commune_data, communes)
    sql_query = f"""
        INSERT INTO {sql_table} 
        (commune_code, indicateur, observation) 
        VALUES (%s, %s, %s)
    """
    query_values = list(itertools.chain.from_iterable(themes_data))
    mysql_crs.executemany(sql_query, query_values)

#Commiting the transaction
mysql_cnx.commit()