BD SQL & NOSQL Project
======================
### Project based on data from RGPH (recensement général de la population et de l’habitat) in 2014
### DATA SOURCE: [RGPH Website](http://rgphentableaux.hcp.ma)

## Part 01: Building MySQL Database and Loading data from [RGPH Website](http://rgphentableaux.hcp.ma)

In [None]:
# Import libraries
import json
import requests
import pymysql
from requests_html import HTMLSession
from time import perf_counter

### Question 01: Mysql Database Diagram
![Mysql DB Diagram](https://i.imgur.com/jnfiqlW.png)

In [None]:
# Question 02: Building Database

#Database Connection
DB_PARAMS = {
    'host':'localhost',
    'port':3309,
    'user':'ehtp',
    'password':'ehtp',
    'database':'ehtp_rgph',
}
mysql_cnx = pymysql.connect(**DB_PARAMS)
mysql_crs = mysql_cnx.cursor()

#Purging tables
print('Dropping tables if they exist...')
mysql_crs.execute('DROP TABLE IF EXISTS observations;')
mysql_crs.execute('DROP TABLE IF EXISTS indicateurs;')
mysql_crs.execute('DROP TABLE IF EXISTS themes;')
mysql_crs.execute('DROP TABLE IF EXISTS communes;')
mysql_crs.execute('DROP TABLE IF EXISTS provinces;')
mysql_crs.execute('DROP TABLE IF EXISTS regions;')
mysql_crs.execute('DROP TABLE IF EXISTS pays;')

#Creating Pays table
print('Creating Pays table...')
mysql_crs.execute(
    """
        CREATE TABLE pays (
            id INT NOT NULL AUTO_INCREMENT,
            code VARCHAR ( 5 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
        PRIMARY KEY ( id ) 
        );
    """
)

#Creating Regions table
print('Creating Regions table...')
mysql_crs.execute(
    """
        CREATE TABLE regions (
            id INT NOT NULL AUTO_INCREMENT,
            pays_code VARCHAR ( 5 ),
            code VARCHAR ( 10 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_pays FOREIGN KEY ( pays_code ) REFERENCES pays ( code ) 
        );
    """
)

#Creating Provinces table
print('Creating Provinces table...')
mysql_crs.execute(
    """
        CREATE TABLE provinces (
            id INT NOT NULL AUTO_INCREMENT,
            region_code VARCHAR ( 20 ),
            code VARCHAR ( 20 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_region FOREIGN KEY ( region_code ) REFERENCES regions ( code ) 
        );
    """
)

#Creating Communes table
print('Creating Communes table...')
mysql_crs.execute(
    """
        CREATE TABLE communes (
            id INT NOT NULL AUTO_INCREMENT,
            province_code VARCHAR ( 20 ),
            code VARCHAR ( 20 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_province FOREIGN KEY ( province_code ) REFERENCES provinces ( code ) 
        );
    """
)

#Creating Themes table
print('Creating Themes table...')
mysql_crs.execute(
    """
        CREATE TABLE themes (
            id INT NOT NULL AUTO_INCREMENT,
            code VARCHAR ( 10 ) UNIQUE,
            libelle VARCHAR ( 50 ) NOT NULL,
            PRIMARY KEY ( id )
        );
    """
)

#Creating Indicateurs table
print('Creating Indicateurs table...')
mysql_crs.execute(
    """
        CREATE TABLE indicateurs (
            id INT NOT NULL AUTO_INCREMENT,
            theme_code VARCHAR ( 10 ),
            libelle VARCHAR ( 200 ) NOT NULL,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_theme FOREIGN KEY ( theme_code ) REFERENCES themes ( code ) 
        );
    """
)

#Creating Observation table
print('Creating Observations table...')
mysql_crs.execute(
    """
        CREATE TABLE observations (
            id INT NOT NULL AUTO_INCREMENT,
            commune_code VARCHAR ( 20 ),
            indicateur_id INT,
            observation DOUBLE,
            PRIMARY KEY ( id ),
        CONSTRAINT fk_commune FOREIGN KEY ( commune_code ) REFERENCES communes ( code ),
        CONSTRAINT fk_indicateur FOREIGN KEY ( indicateur_id ) REFERENCES indicateurs ( id )
        );
    """
)

In [None]:
# Question 03: Loading Data into Database from RGPH Website

#It's neccesary to run the precedent cell every time you want to run this

RGPH_URL = 'http://rgphentableaux.hcp.ma/Default1/'
session = HTMLSession()

rgph_html = session.get(RGPH_URL).html

#Filling Pays table
print('Filling Pays table...')
sql_query = "INSERT INTO pays (code, libelle) VALUES ('01', 'maroc')"
mysql_crs.execute(sql_query)

#Filling Regions table
print('Filling Regions table...')
sql_query = "INSERT INTO regions (pays_code, code, libelle) VALUES ('01', %s, %s)"
query_values = []
for opt in rgph_html.find("#REGIONSLIST option"):
    query_values.append((opt.attrs['value'].strip(), opt.text.strip()))
mysql_crs.executemany(sql_query, query_values)

#Filling Provinces table
print('Filling Provinces table...')
with open('province_region_code.json') as json_file:
    region_province = json.load(json_file)
    sql_query = "INSERT INTO provinces (region_code, code, libelle) VALUES (%s, %s, %s)"
    query_values = []
    for opt in rgph_html.find("#PROVINCESLIST option"):
        province_code = opt.attrs['value'].strip()
        region_code = region_province.get(province_code)
        if region_code:
            query_values.append((region_code.get('code_region'), province_code, opt.text.strip()))
mysql_crs.executemany(sql_query, query_values)


#Filling Communes table
print('Filling Communes table...')
communes = []
sql_query = "INSERT INTO communes (province_code, code, libelle) VALUES (%s, %s, %s)"
query_values = []
for opt in rgph_html.find("#COMMUNELIST option"):
    commune_code = opt.attrs['value'].strip()[:-1]
    province_code = commune_code[:3]
    query_values.append((province_code, commune_code, opt.text.strip()))
    communes.append(commune_code)
mysql_crs.executemany(sql_query, query_values)


#Filling Themes table
print('Filling Themes table...')
themes = []
sql_query = "INSERT INTO themes (code, libelle) VALUES (%s, %s)"
query_values = []
for rdb in rgph_html.find("#THEM"):
    theme_code = rdb.attrs['value'].strip()
    themes.append(theme_code)
    query_values.append((theme_code, rdb.attrs['text'].strip()))
mysql_crs.executemany(sql_query, query_values)


#Filling Indicateurs table
print('Filling Indicateurs table...')
PREFIX_INDICATEURS = ('iuf', 'ium', 'irf', 'irm')
HEADERS = {'Referer': RGPH_URL}
sql_query = "INSERT INTO indicateurs (theme_code, libelle) VALUES (%s, %s)"
query_values = []
indicateurs = {}
for theme in themes:
    url = f'{RGPH_URL}getDATA/?type=Commune&CGEO=001.01.01.&them={theme}'
    result = requests.get(url, headers=HEADERS).json()
    for line in result:
        indicateur = line.get('INDICATEUR').strip().lower()
        if indicateur[:3] in PREFIX_INDICATEURS and not indicateurs.get(indicateur):
            query_values.append((theme, indicateur))
            indicateurs[indicateur] = mysql_cnx.insert_id()
mysql_crs.executemany(sql_query, query_values)



#Filling observations table
t1 = perf_counter()
print('Filling Observations table...')
sql_query = "INSERT INTO observations (commune_id, indicateur_id, observation) VALUES (%s, %s, %s)"
query_values = []
counter = 0
for commune_code in communes:
    for theme in themes:
        url = f'{RGPH_URL}getDATA/?type=Commune&CGEO={commune_code}.&them={theme}'
        result = requests.get(url, headers=HEADERS).json()
        for line in result:
            indicateur_id = indicateurs.get(line.get('INDICATEUR').strip().lower())
            if indicateur_id:
                valeur = line.get('DATA2014')
                observation = float(valeur) if valeur.isdigit() else 0
                query_values.append((commune_code, indicateur_id, observation))
    counter += 1
    print(f"Commune {commune_code} filled. {round(counter*100/len(communes),2)}%")
mysql_crs.execute(sql_query, query_values)


#Commiting the transaction
mysql_cnx.commit()