BD SQL & NOSQL Project
======================
### Project based on data from RGPH (recensement général de la population et de l’habitat) in 2014
### DATA SOURCE: [RGPH Website](http://rgphentableaux.hcp.ma)

## Part 02: Interrogating MySQL Database

In [1]:
# Import libraries
import pymysql
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format

In [2]:
#Database Connection
DB_PARAMS = {
    'host':'localhost',
    'port':3306,
    'user':'root',
    'password':'ehtp',
    'database':'ehtp_rgph',
}
mysql_cnx = pymysql.connect(**DB_PARAMS)


In [4]:

sql_query = """
    SELECT
        pays.libelle AS 'Pays',
        sum( observations_demographie.observation ) AS 'Population municipale' 
    FROM
        observations_demographie
        JOIN communes ON communes.code = observations_demographie.commune_code
        JOIN provinces ON provinces.code = communes.province_code
        JOIN regions ON regions.code = provinces.region_code
        JOIN pays ON pays.code = regions.pays_code 
    WHERE
        observations_demographie.indicateur LIKE '%population munici%' 
    GROUP BY
        pays.libelle
"""
pd.read_sql(sql_query, mysql_cnx)

Unnamed: 0,Pays,Population municipale
0,maroc,34737845.0


In [5]:
# Question 05: Top 10 Populated Communes
sql_query = """
    SELECT
        communes.libelle AS 'Commune',
        sum( observations_demographie.observation ) AS population_municipale 
    FROM
        observations_demographie
        JOIN communes ON communes.code = observations_demographie.commune_code 
    WHERE
        observations_demographie.indicateur LIKE '%population munici%' 
    GROUP BY
        communes.libelle 
    ORDER BY
        population_municipale DESC 
        LIMIT 10;
"""
pd.read_sql(sql_query, mysql_cnx)

Unnamed: 0,Commune,population_municipale
0,Meknès,517376.0
1,Oujda,492873.0
2,Hay-Hassani,467880.0
3,Sidi Moumen,452863.0
4,Kénitra,423890.0
5,Agadir,420288.0
6,Ménara,409829.0
7,Bni Makada,385922.0
8,Tétouan,377866.0
9,Aîn-Chock,376772.0


In [6]:
# Question 06: Communes ou of Rabat and Casablanca Region having
# Population greater than 50000, and Unemployment rate lower than 15%
# Female Activity rate greater than 30% and Illiteracy rate lower than 20%
sql_query = """
    SELECT
        communes.code,
        communes.libelle,
        od_s.population,
        oac_s.taux_chomage,
        oaa_s.taux_activite_feminin,
        oe_s.taux_analphabetisme
    FROM
        communes
        JOIN provinces ON provinces.CODE = communes.province_code
        JOIN regions ON regions.CODE = provinces.region_code
        JOIN (
                        SELECT commune_code, SUM( od.observation ) AS population 
                        FROM observations_demographie as od
                        WHERE od.indicateur like '%population municipale%'
                        GROUP BY commune_code 
                    ) AS od_s ON od_s.commune_code = communes.code
        JOIN (
                        SELECT commune_code, AVG( oac.observation ) AS taux_chomage
                        FROM observations_activite as oac
                        WHERE oac.indicateur like '%taux de chômage%'
                        GROUP BY commune_code 
                    ) AS oac_s ON oac_s.commune_code = communes.code
        JOIN (
                        SELECT commune_code, AVG( oaa.observation ) AS taux_activite_feminin
                        FROM observations_activite as oaa
                        WHERE oaa.indicateur like "%f_taux net d'activité%"
                        GROUP BY commune_code 
                    ) AS oaa_s ON oaa_s.commune_code = communes.code
        JOIN (
                        SELECT commune_code, AVG( oe.observation ) AS taux_analphabetisme
                        FROM observations_education as oe
                        WHERE oe.indicateur like "%taux d'analphabétisme%"
                        GROUP BY commune_code 
                    ) AS oe_s ON oe_s.commune_code = communes.code
    WHERE
        regions.id NOT IN (
        '04',
        '06')
    and od_s.population > 50000
    and oac_s.taux_chomage < 15
    and oaa_s.taux_activite_feminin > 30
    and oe_s.taux_analphabetisme < 20
"""
pd.read_sql(sql_query, mysql_cnx)

Unnamed: 0,code,libelle,population,taux_chomage,taux_activite_feminin,taux_analphabetisme
