# Baromètre des résultats - PDF reports

## Imports

In [1]:
import json
import os
from os.path import join
import datetime
import random

from urllib.request import urlopen
import urllib

import plotly.graph_objects as go
import plotly.express as px

import pandas as pd
import numpy as np

from fpdf import FPDF

## Functions

In [2]:
def mkdir_ifnotexist(path) :
    if not os.path.isdir(path) :
        os.mkdir(path)

In [3]:
def get_dep_infos(dep, dep_taxo_path='./data/taxonomies/departements.json', reg_taxo_path='./data/taxonomies/regions.json') :
    with open(dep_taxo_path, 'r') as f :
        dep_dict_list = json.loads(f.read())
    with open(reg_taxo_path, 'r') as f :
        reg_dict_list = json.loads(f.read())
        
    dep_dict = [dict_ for dict_ in dep_dict_list if dict_['dep'] == dep][0]
    dep_name = dep_dict['libelle']
    reg = dep_dict['reg']
    
    reg_dict = [dict_ for dict_ in reg_dict_list if dict_['reg'] == reg][0]
    reg_name = reg_dict['libelle']
    
    res = dict(dep=dep,
               dep_name=dep_name,
               reg=reg,
               reg_name=reg_name
              )
    
    return res

In [4]:
def make_pct_bullet_chart(pct, bar_color='#3D49A5', bg_color='#BBBFDF') :
    
    rounded_pct = round(pct)
    
    fig = go.Figure(go.Indicator(
        mode = "gauge",
        gauge = {'shape': "bullet", 'axis': {'visible':False, 'range': [0, 100]}, 'bgcolor':bg_color, 'bordercolor':'white', 'bar':{'thickness':1, 'color':bar_color}},
        value = rounded_pct,
        number = {'suffix':'%'},
        domain = {'x': [0, 1], 'y': [0, 1]}))
    
    fig.update_layout(height = 400, width=1200)

    fig.add_annotation(dict(font=dict(color='white',size=100),
                                            x=0.05,
                                            y=0.5,
                                            showarrow=False,
                                            text=str(rounded_pct)+'%',
                                            textangle=0,
                                            xanchor='left',
                                            xref="paper",
                                            yref="paper"))

    return fig

In [5]:
months_fr_dict = {
    '01':'janvier',
    '02':'février',
    '03':'mars',
    '04':'avril',
    '05':'mai',
    '06':'juin',
    '07':'juillet',
    '08':'août',
    '09':'septembre',
    '10':'octobre',
    '11':'novembre',
    '12':'décembre'
}

# yearmonth should be in '2020-01' format

def yearmonth_to_fr(yearmonth) :
    month = yearmonth[-2:]
    year = yearmonth[:4]
    
    return months_fr_dict[month] + ' ' + year

In [12]:
def make_detail_chart(df, chart_type) :
    if chart_type == 'bar' :
        fig = px.bar(df, x='date', y='valeur',
                         labels={'date':'', 'valeur':''}, 
                         height=400,
                         width=600,
                         color=None,
                         orientation='v',
                         text=None,
                         title=None)
        fig.update_layout(showlegend=False)
        fig.layout['xaxis'].showgrid = False
        fig.update_traces(marker_color='#4DA98E')
        fig.update_xaxes(tickangle=-45,
                         tickfont=dict(size=8),
                         tickmode = 'array',
                         tickvals = list(df['date']),
                         ticktext= list(df['date_tick']))
                        
        return fig
    
    elif chart_type == 'line' :
        fig = px.line(df, x='date', y='valeur',
                         labels={'date':'', 'valeur':''}, 
                         height=400,
                         width=500,
                         color=None,
                         orientation='v',
                         text=None,
                         title=None)
        fig.update_layout(showlegend=False)
        fig.layout['xaxis'].showgrid = False
        fig.update_traces(marker=dict(size=12, line=dict(width=2, color='white')), marker_color='#4DA98E', line_color='#4DA98E', line_width=6)
        fig.update_xaxes(tickangle=-45,
                         tickfont=dict(size=8),
                         tickmode = 'array',
                         tickvals = list(df['date']),
                         ticktext= list(df['date_tick']))
        fig.data[0].update(mode='markers+lines')

        return fig
    
    else :
        print('Unknown chart type.')
        return None

## Parameters

In [7]:
ovq_data_dir_path = './ovq-data/published-data/'
data_dir_path = './data/'
img_dir_path = './img/'
reports_dir_path = './reports/'

In [8]:
detail_chart_type_dict = {
    'nb-controles-clir':'bar',
    'logement-sans-domicile-hebergement-urgence':'bar',
    'km-amenagement-cyclables-securises':'line',
    'tx-equipements-dentaire':'line',
    'nombre-contrats-apprentissage':'bar',
    'tx-classes-cp-ce1':'line',
    'tx-classes-cp-ce1-rep':'line',
    'tx-gs-rep':'line',
    'nb-morts-routes':'bar',
    'nb-beneficiaires-suppression-taxe-habitation':'line',
    'tx-reussite-examens':'line',
    'tx-equipements-audio':'line',
    'tx-satisfaction-usagers-services-publics':'line',
    'nb-heures-patrouille':'bar',
    'nb-labellisations-france-services':'line',
    'tx-transparence-resultats':'bar',
    'duree-moyenne-traitement-demandes-allocation-adulte-handicape':'bar',
    'nombre-creation-ueea':'line',
    'taux-deploiement-fibre':'line',
    'nb-habitants-zfe':'line',
    'nb-ordonnances-delivrees':'bar',
    'nombre-creation-ulis':'line',
    'nb-utilisateurs-pass-culture':'line',
    'nb-amendes-delictuelles-dressees':'bar',
    'nb-beneficiaires-siae':'line',
    'nb-francais-emballages-bac-jaune':'line',
    'nb-peres-conge-paternite':'line',
    'nb-maisons-sante':'line',
    'tx-equipements-optique':'line',
    'nb-primes-conversion':'line',
    'nombre-recrutements-jeunes':'bar'
}

In [9]:
cat_mesures_dict = {
    'Logement' : ['Offrir un logement aux sans-abris : Logement d’abord'] ,
    'Services publics et territoires' : ["Assurer une bonne couverture en internet fixe et en téléphonie mobile pour tous les Français d'ici 2022",
                                         "Déployer une offre France Services dans tous les territoires",
                                         "Améliorer la qualité du service rendu à l’usager"
                                        ],
    'Transition écologique' : ["Déployer le plan vélo",
                               "Mettre en œuvre la sortie du plastique à usage unique et lutter contre le gaspillage",
                               "Verdir le parc automobile",
                               "Instaurer des zones à faibles émissions"
                              ],
    'Economie - Emploi' : ["Développer l’apprentissage",
                           "Plan #1jeune1solution",
                           "Supprimer la taxe d’habitation sur les résidences principales",
                          ],
    'Santé - Famille - Handicap' : ["Doubler le nombre de maisons de santé",
                                    "Allonger le congé paternité pour un meilleur développement de l’enfant",
                                    "Simplifier l’accès aux droits des personnes handicapées",
                                    "Proposer une offre de lunettes, appareils auditifs et prothèses dentaires remboursée à 100%"
                                   ],
    'Sécurité' : ["Lutter contre les stupéfiants",
                  "Lutter contre les atteintes aux principes républicains",
                  "Lutter contre les violences faites aux femmes",
                  "Réduire la mortalité sur les routes",
                  "Renforcer la sécurité du quotidien"
                 ],
    'Education' : ["Offrir une scolarisation inclusive à tous les enfants handicapés",
                   "Déployer Parcoursup",
                   "Limiter les classes à 24 en grande section, CP, CE1",
                   "Dédoubler les classes en REP (grande section, CP, CE1)"
                  ],
    'Culture' : ["Déployer le Pass culture"]
}

In [10]:
#Departements list
dep_df = pd.read_csv(join(ovq_data_dir_path, 'barometre-resultats-synthese-departemental.csv'), usecols=['code_departement'], dtype={'code_departement':'str'})
dep_list = sorted(list(set(dep_df['code_departement'])))
len(dep_list)

101

## Data Processing

In [105]:
mkdir_ifnotexist(data_dir_path)

In [156]:
def agg_resultats_departemental(dep, data_type) :
    reg = get_dep_infos(dep)['reg']
    
    #Aggregating dep, reg and national files
    dep_df = pd.read_csv(join(ovq_data_dir_path, 'barometre-resultats-{}-departemental.csv'.format(data_type)), dtype={'code_departement':'str'})
    dep_df = dep_df[dep_df['code_departement'] == dep]
    dep_df['maille'] = '0 - departemental'

    reg_df = pd.read_csv(join(ovq_data_dir_path, 'barometre-resultats-{}-regional.csv'.format(data_type)), dtype={'code_region':'str'})
    reg_df = reg_df[reg_df['code_region'] == reg]
    reg_df['maille'] = '1 - regional'

    nat_df = pd.read_csv(join(ovq_data_dir_path, 'barometre-resultats-{}-national.csv'.format(data_type)))
    nat_df['maille'] = '2 - national'

    df = pd.concat([dep_df, reg_df, nat_df]).reset_index(drop=True)
    
    #Keeping only the smallest "maille" for each indicator
    df = df.sort_values('maille')
    
    if data_type == 'synthese' :
        df = df.drop_duplicates('id_indicateur').reset_index(drop=True)
    elif data_type == 'detail' :
        df = df.drop_duplicates(['id_indicateur', 'date']).reset_index(drop=True)
        
    #Dropping useless columns
    df = df.drop(['code_departement', 'libelle_departement', 'code_region', 'libelle_region', 'maille_geographique'], 1)
    
    #Saving the file
    df.to_csv(join(data_dir_path, 'barometre-resultats-{}-agg-{}.csv'.format(data_type, dep)), index=False)

In [158]:
%%time
for dep in dep_list :
    agg_resultats_departemental(dep, data_type='synthese')
    agg_resultats_departemental(dep, data_type='detail')

CPU times: user 7.97 s, sys: 1.21 s, total: 9.19 s
Wall time: 9.27 s


## Making the graphs

In [10]:
mkdir_ifnotexist(join(img_dir_path, 'graphs'))

### Synthese gauges

In [203]:
%%time
data_type = 'synthese'

for dep in dep_list :
    df = pd.read_csv(join(data_dir_path, 'barometre-resultats-{}-agg-{}.csv'.format(data_type, dep)))
    #Keeping only indicators with "pourcentage_cible"
    df = df[df['pourcentage_cible'].notnull()]
    
    df_dict = df.transpose().to_dict()
    
    for key in df_dict.keys() :
        id_indicateur = df_dict[key]['id_indicateur']
        pourcentage_cible = df_dict[key]['pourcentage_cible']
        fig = make_pct_bullet_chart(pourcentage_cible)

        img_path = join(img_dir_path, 'graphs', 'bullet_chart-{}-pourcentage_cible-{}-dep_{}.png'.format(data_type, id_indicateur, dep))

        if fig is not None :
            fig.write_image(img_path)

CPU times: user 8.85 s, sys: 765 ms, total: 9.61 s
Wall time: 29.2 s


### Detail graphs

In [13]:
%%time
data_type = 'detail'

for dep in dep_list :
    df = pd.read_csv(join(data_dir_path, 'barometre-resultats-{}-agg-{}.csv'.format(data_type, dep)))
    df.drop_duplicates(inplace=True)
    id_indicateur_list = list(set(df['id_indicateur']))
    
    for id_indicateur in id_indicateur_list :
        chart_type = detail_chart_type_dict[id_indicateur]
        
        df_ind = df[df['id_indicateur'] == id_indicateur]
        df_ind = df_ind.sort_values('date')
        df_ind['date_tick'] = df_ind['date'].apply(lambda x : yearmonth_to_fr(x[:7]))
        fig = make_detail_chart(df_ind, chart_type)
        
        img_path = join(img_dir_path, 'graphs', 'graph-{}-{}-dep_{}.png'.format(data_type, id_indicateur, dep))

        if fig is not None :
            fig.write_image(img_path)
            
    print('{} - {} done.'.format(datetime.datetime.today(), dep))

2021-01-07 16:01:58.841993 - 01 done.
2021-01-07 16:02:01.195149 - 02 done.
2021-01-07 16:02:03.567198 - 03 done.
2021-01-07 16:02:05.920510 - 04 done.
2021-01-07 16:02:08.285422 - 05 done.
2021-01-07 16:02:10.545497 - 06 done.
2021-01-07 16:02:12.902612 - 07 done.
2021-01-07 16:02:15.160124 - 08 done.
2021-01-07 16:02:17.469043 - 09 done.
2021-01-07 16:02:19.856939 - 10 done.
2021-01-07 16:02:22.166428 - 11 done.
2021-01-07 16:02:24.515112 - 12 done.
2021-01-07 16:02:26.879560 - 13 done.
2021-01-07 16:02:29.377381 - 14 done.
2021-01-07 16:02:31.682469 - 15 done.
2021-01-07 16:02:34.106154 - 16 done.
2021-01-07 16:02:36.413059 - 17 done.
2021-01-07 16:02:38.734806 - 18 done.
2021-01-07 16:02:41.034785 - 19 done.
2021-01-07 16:02:43.473222 - 21 done.
2021-01-07 16:02:45.854121 - 22 done.
2021-01-07 16:02:48.213311 - 23 done.
2021-01-07 16:02:50.550209 - 24 done.
2021-01-07 16:02:52.886721 - 25 done.
2021-01-07 16:02:55.181507 - 26 done.
2021-01-07 16:02:57.570747 - 27 done.
2021-01-07 1

## Building the reports

In [14]:
dep = '01'
data_type = 'synthese'

In [15]:
df = pd.read_csv(join(data_dir_path, 'barometre-resultats-{}-agg-{}.csv'.format(data_type, dep)))

In [16]:
df.head()

Unnamed: 0,mesure,indicateur,id_indicateur,valeur_actuelle,date_valeur_actuelle,valeur_initiale,date_valeur_initiale,progression,pourcentage_progression,cible,pourcentage_cible,date_cible,unite,maille
0,Offrir un logement aux sans-abris : Logement d...,Nombre de personnes sans abri ou en hébergemen...,logement-sans-domicile-hebergement-urgence,420.0,2019-12-31,363.0,2017-12-31,57.0,16.0,,,,personnes dans l'année,0 - departemental
1,"Proposer une offre de lunettes, appareils audi...",Part des équipements/soins 100% santé dans le ...,tx-equipements-optique,6.14,2020-10-31,6.95,2020-01-31,-1.0,-12.0,,,,%,0 - departemental
2,"Proposer une offre de lunettes, appareils audi...",Part des équipements/soins auditives 100% sant...,tx-equipements-audio,9.6,2020-10-31,6.16,2019-01-31,3.0,56.0,,,,%,0 - departemental
3,Simplifier l’accès aux droits des personnes ha...,Durée moyenne de traitement pour les demandes ...,duree-moyenne-traitement-demandes-allocation-a...,6.2,2020-09-30,4.4,2017-12-31,2.0,41.0,3.0,-129.0,2022-12-31,mois de traitement,0 - departemental
4,"Dédoubler les classes en REP (grande section, ...",Part de grande section en REP dédoublées,tx-gs-rep,36.0,2020-11-30,24.0,2019-11-30,12.0,50.0,91.0,18.0,2022-11-30,% des classes,0 - departemental


In [17]:
df_dict = df.transpose().to_dict()

In [28]:
ind_dict = df_dict[3]

In [30]:
mkdir_ifnotexist(reports_dir_path)

In [31]:
global title_header
title_header = ''
#global subtitle_header
#subtitle_header = ''

In [32]:
class PDF(FPDF):
    def header(self):
        if (self.page_no() != 1 ):
            # Logo
            self.image(os.path.join(img_dir_path, 'gouv.png'), 10, 8, 45)
            # Arial bold 15
            self.cell(50)
            self.set_font('Arial', 'B', 16)
            self.cell(80, 15, title_header, 0, 1, 'A')

            # Move to the right
            # Title
            self.set_font('Arial', 'I', 9)
            self.cell(50)
            #self.cell(50, 10, subtitle_header+' - aides-entreprises.data.gouv.fr', 0, 1, 'A', link='https://aides-entreprises.data.gouv.fr/')
            self.cell(50, 10, 'barometre-resultats.data.gouv.fr', 0, 1, 'A', link='https://barometre-resultats.data.gouv.fr/')
            # Line break  
            pdf.line(40, 38, 170, 38)
            self.ln(7)
        
    # Page footer
    def footer(self):
        if (self.page_no() != 1 ):
            # Position at 1.5 cm from bottom
            self.set_y(-15)
            # Arial italic 8
            self.set_font('Arial', 'I', 8)
            # Page number
            
            self.cell(0, 10, 'Direction Interministérielle du Numérique (DINUM) - Page ' + str(self.page_no()) + '/{nb}', 0, 0, 'C')
            
    def chapter_title(self, num, label):
        # Arial 12
        self.set_font('Arial', '', 12)
        # Background color
        self.set_fill_color(200, 220, 255)
        # Title
        self.cell(0, 6, 'Chapter %d : %s' % (num, label), 0, 1, 'L', 1)
        # Line break
        self.ln(4)
    
    def chapter_body(self, name):
        # Read text file
        with open(name, 'rb') as fh:
            txt = fh.read().decode('latin-1')
        # Times 12
        self.set_font('Times', '', 12)
        # Output justified text
        self.multi_cell(0, 5, txt)
        # Line break
        self.ln()

    def print_chapter(self, num, title, name):
        self.add_page()
        self.chapter_title(num, title)
        self.chapter_body(name)

In [33]:
def format_val(val,add):
    return '{:,}'.format(int(float(val))).replace(',', ' ')+add

In [46]:
ind_dict

{'mesure': 'Simplifier l’accès aux droits des personnes handicapées',
 'indicateur': 'Durée moyenne de traitement pour les demandes d’allocation adulte handicapé',
 'id_indicateur': 'duree-moyenne-traitement-demandes-allocation-adulte-handicape',
 'valeur_actuelle': 6.2,
 'date_valeur_actuelle': '2020-09-30',
 'valeur_initiale': 4.4,
 'date_valeur_initiale': '2017-12-31',
 'progression': 2.0,
 'pourcentage_progression': 41.0,
 'cible': 3.0,
 'pourcentage_cible': -129.0,
 'date_cible': '2022-12-31',
 'unite': 'mois de traitement',
 'maille': '0 - departemental'}

In [157]:
def insert_indicateur(pdf, ind_dict) :
    
    #Getting informations from the indicateur dict
    ind_name = ind_dict['indicateur'].replace('’', "'")
    ind_init_date = yearmonth_to_fr(ind_dict['date_valeur_initiale'][:7])
    ind_init_value = str(ind_dict['valeur_initiale']).replace('.',',')
    unite = ind_dict['unite']
    
    #Building the indicateur section
    
    block_width = 60
    block_spacing = 5
    
    pdf.set_fill_color(83,101,125);
    pdf.rect(20,41,180,14,'F')
    
    pdf.image(os.path.join(img_dir_path, 'information.png'), x=10, y=44.5, w=7)
    
    pdf.set_font('Arial', 'I', 7)
    pdf.set_text_color(255,255,255)
    pdf.cell(10)
    pdf.multi_cell(100, 5, txt="Description de la mesure qui donne plus d'information")
    pdf.ln(10)
       
    pdf.set_text_color(0,0,0)

    pdf.set_font('Arial', 'BI', 14)
    pdf.cell(65, 10, ind_name, ln=1)
    pdf.ln(5)
    
    y_blocks_top = pdf.get_y()
    
    #First block
    pdf.set_fill_color(253, 244, 242)
    pdf.set_font('Arial', '', 8)
    pdf.cell(block_width, 8, 'En {}'.format(ind_init_date), 0, 1, 'L', fill=True)
    pdf.set_fill_color(254, 249, 248)
    pdf.set_font('Arial', 'B', 20)
    pdf.cell(block_width, 15, ind_init_value, 0, 1, 'L', fill=True)
    pdf.set_font('Arial', '', 10)
    pdf.cell(block_width, 3, unite, 0, 1, 'L', fill=True)
    pdf.cell(block_width, 5, '', 0, 1, 'L', fill=True)
    
    pdf.set_xy(block_width + pdf.l_margin + block_spacing, y_blocks_top)
    
    #Second block
    pdf.set_fill_color(234, 244, 239)
    pdf.set_font('Arial', '', 8)
    pdf.cell(block_width, 8, 'En {}'.format(ind_init_date), 0, 2, 'L', fill=True)
    pdf.set_fill_color(244, 250, 247)
    pdf.set_font('Arial', 'B', 20)
    pdf.cell(block_width, 15, ind_init_value, 0, 2, 'L', fill=True)
    pdf.set_font('Arial', '', 10)
    pdf.cell(block_width, 3, unite, 0, 2, 'L', fill=True)
    pdf.cell(block_width, 5, '', 0, 2, 'L', fill=True)
    
    pdf.set_xy(2*block_width + pdf.l_margin + 2*block_spacing, y_blocks_top)
    
    #Third block
    pdf.set_fill_color(229, 229, 243)
    pdf.set_font('Arial', '', 8)
    pdf.cell(block_width, 8, 'En {}'.format(ind_init_date), 0, 2, 'L', fill=True)
    pdf.set_fill_color(242, 242, 248)
    pdf.set_font('Arial', 'B', 20)
    pdf.cell(block_width, 15, ind_init_value, 0, 2, 'L', fill=True)
    pdf.set_font('Arial', '', 10)
    pdf.cell(block_width, 3, unite, 0, 2, 'L', fill=True)
    pdf.cell(block_width, 5, '', 0, 2, 'L', fill=True)

    
    return pdf

In [158]:
%%time

mkdir_ifnotexist(os.path.join(reports_dir_path, 'pdf'))

for dep in ['01']:
    
    pdf = PDF()
    pdf.alias_nb_pages()
    pdf.add_page()

    #Logos
    pdf.image(os.path.join(img_dir_path, 'gouv.png'), 10, 8, 125)
    
    # Arial bold 15
    pdf.set_font('Arial', 'B', 28)
    # Move to the right
    pdf.cell(50)
    pdf.ln(100)
    
    # Title
    pdf.cell(10)
    pdf.cell(50, 10, 'BAROMÈTRE DES RÉSULTATS DE', 0, 1, 'A')
    pdf.ln(10)
    pdf.cell(10)
    pdf.cell(50, 10, "L'ACTION PUBLIQUE", 0, 1, 'A')
    pdf.ln(10)
    pdf.cell(60)

    pdf.set_font('Arial', 'I', 20)
    pdf.ln(10)
    pdf.cell(10)
    pdf.cell(50, 10, 'Présentation des résultats pour le département :', 0, 1, 'A')
    pdf.ln(10)
    pdf.cell(10)
    pdf.cell(50, 10, dep, 0, 1, 'A')
    pdf.ln(70)

    pdf.set_font('Arial', 'I', 8)

    pdf.cell(10)
    pdf.cell(50, 10, "Données issues du baromètre des résultats consultable sur https://barometre-resultats.data.gouv.fr/", 0, 1, 'A')
    
    #Indicateur
    
    title_header = 'TITRE'
    #subtitle_header = find_last_update('aides')
    pdf.add_page()
    pdf = insert_indicateur(pdf, ind_dict)
    

    pdf.output(os.path.join(reports_dir_path, 'pdf', 'Baromètre_résultats_'+dep+'.pdf'), 'F')
    
    print(str(datetime.datetime.today()) + ' - ' + dep + ' done.')

2021-01-07 18:57:00.411080 - 01 done.
CPU times: user 1.32 s, sys: 581 ms, total: 1.91 s
Wall time: 1.91 s
