In [1]:
# default_exp pdf_generator

# pdf_generator

> Ce module permet de lire les fichiers reçu et de générer le PDF, à l'aide du module pdf_helper.

In [2]:
#hide
from nbdev.showdoc import *
!pip install -q -r requirements.txt
# For multiple output per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
#export
import datetime
import pandas as pd
#from fpdf import FPDF
from covid_certificate_generator import pdf_helper
import io, sys, os
import tempfile
import pathlib
import logging
import time

In [4]:
%%writefile data.csv
NomParent,PrenomParent,NomEnfant,PrenomEnfant,DateNaissance,Moyen
NomParent1,PrenomParent1,NomEnfant1,PrenomEnfant1,12/06/2010,pied
NomParent2,PrenomParent2,NomEnfant2,PrenomEnfant2,12/04/2011,pied
NomParent3,PrenomParent3,NomEnfant3,PrenomEnfant3,12/04/2011,vélo
NomParent4,PrenomParent4,NomEnfant4,"Prenom, Enfant4",12/04/2011,car

Overwriting data.csv


In [5]:
#ignore
df = pd.read_csv('data.csv')
df

Unnamed: 0,NomParent,PrenomParent,NomEnfant,PrenomEnfant,DateNaissance,Moyen
0,NomParent1,PrenomParent1,NomEnfant1,PrenomEnfant1,12/06/2010,pied
1,NomParent2,PrenomParent2,NomEnfant2,PrenomEnfant2,12/04/2011,pied
2,NomParent3,PrenomParent3,NomEnfant3,PrenomEnfant3,12/04/2011,vélo
3,NomParent4,PrenomParent4,NomEnfant4,"Prenom, Enfant4",12/04/2011,car


In [13]:
#export
class PDFGenerator:
    config = None # Dict configuration
    school = None
    school_sign = None
    logger = None
    num_pages = 0
    pdf_w=210
    pdf_h=297
    
    def __init__(self, config_file=None):
        """
        Constructor
        :param config_file: str, Path to config file
        :return:
        """
        logger = logging.getLogger("download")
        formatter = logging.Formatter("%(asctime)s -  %(name)-12s %(levelname)-8s %(message)s")
        logger.setLevel(logging.DEBUG)
        log_file = f"./logs/pdf_generator-{datetime.datetime.today().strftime('%Y-%m-%d')}.log"
        fh = logging.FileHandler(log_file)
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)
        logger.addHandler(fh) # Output to file
        logger.addHandler(logging.StreamHandler()) # And to console
        self.logger = logger
        #logger.info(f'Starting...')

    def get_temp_file(self, ext=None):
        tmp_dir = tempfile._get_default_tempdir()
        tmp_name = next(tempfile._get_candidate_names())
        if ext:
            return os.path.join(tmp_dir, tmp_name + ext)
        else:
            return os.path.join(tmp_dir, tmp_name)

    def generate_one_attestation(self, prenom_parent, nom_parent, prenom_enfant, nom_enfant, date_naissance, moyen):
        self.pdf.add_page()
        self.pdf.titles()
        self.pdf.texte_parent(prenom_parent, nom_parent)
        self.pdf.texte_enfant(prenom_enfant, nom_enfant,date_naissance, moyen)
        self.pdf.texte_etablissement(
            self.school['school_name'], self.school['school_adress'], self.school_sign, self.school['city'])

    def generate(self, row):
        self.generate_one_attestation(row.PrenomParent, row.NomParent, row.PrenomEnfant, row.NomEnfant, row.DateNaissance, row.Moyen)
        
    def get_pdf(self, students_data, school_sign, school):
        
        self.school_sign = school_sign
        self.school = school
        try:
            if isinstance(students_data, str) and len(students_data)<256:
                with open(students_data, 'rb') as fh:
                    students_data = io.BytesIO(fh.read())
            header = students_data.read(3)
            students_data.seek(0,0)
            XLS = b'\xd0\xcf\x11'
            XLSX = b'PK\x03'
            CSV = b'\xef\xbb\xbf' # UTF-8 Unicode (with BOM) text
            ascii_text = all(c > 60 and c < 128 for c in header)
            if header == CSV or ascii_text:
                #if '.csv' in students_filename:
                # The user uploaded a CSV file
                df = pd.read_csv(students_data)
                if len(df.columns) == 1: # Is separator , ?
                    students_data.seek(0,0)
                    df = pd.read_csv(students_data, sep=';')
                    if len(df.columns) == 1: # Is separator \t ?
                        students_data.seek(0,0)
                        df = pd.read_csv(students_data, sep='\t')
            elif header in (XLS, XLSX):
                df = pd.read_excel(students_data)
            else:
                # Assume that the user uploaded an excel file
                df = pd.read_excel(students_data)
        except:
            error = f'ERREUR à la lecture du fichier : {sys.exc_info()[0]}'
            self.logger.error(error)
            raise
            
        if len(df.columns) < 6:
            raise ValueError("Format de fichier invalide : pas assez de colonnes !")
        self.pdf = pdf_helper.PDF(orientation='P', unit='mm', format='A4')
        self.pdf.set_author('Data For Good France')
        _ = df.apply(self.generate, axis=1)
        self.num_pages = len(df)
        

    def get_pdf_from_file(self, students_file, school_sign, school, output_name, return_object = False):
        start_time = time.time()
        self.logger.debug(f"Starting get_pdf_from_file for {school['school_name']}")
        # Read file
#         with open(students_file, 'rb') as fh:
#             students_data = io.BytesIO(fh.read())
        self.get_pdf(students_file, school_sign, school)
        
        if return_object:
            result = self.pdf.output(output_name,"S")
        else:
            _ = self.pdf.output(output_name,'F')
            result = None
        exec_time = f'Execution time for {self.num_pages} pages: {round(time.time() - start_time, 3)} second(s).'
        self.logger.info(exec_time)
        return result

    def get_pdf_from_BytesIO(self, students_filename, students_data, school_sign_filename, school_sign_data, school):
        start_time = time.time()
        self.logger.debug(f'Starting get_pdf_from_BytesIO...')
        if len(school_sign_filename) < 2:
            school_sign_filename = ''
        school_sign_filename = self.get_temp_file(school_sign_filename)
        with open(school_sign_filename, "wb") as f:
            f.write(school_sign_data.getbuffer())
        self.get_pdf(students_data, school_sign_filename, school)
        output_name = self.get_temp_file('.pdf')
        _ = self.pdf.output(output_name,'F')
        # Clean temp file for school image
        path = pathlib.Path(school_sign_filename)
        path.unlink()
        exec_time = f'Execution time for {self.num_pages} pages: {round(time.time() - start_time, 3)} second(s).'
        self.logger.info(exec_time)
        return output_name
        #return self.pdf.output(dest='I')


In [14]:
%%time
school={
    'school_name':'Ecole Jean Jaurès',
    'school_adress':'rue Jean Jaurès, 42 000 Libreville',
    'city':'Libreville'
}
students_filename = 'data.csv'
school_sign_filename = 'cachet.jpg'

#decoded = base64.b64decode(students_file)
#students_file = io.BytesIO(decoded)


with open(students_filename, 'rb') as fh:
    students_data = io.BytesIO(fh.read())
with open(school_sign_filename, 'rb') as fh:
    school_sign_data = io.BytesIO(fh.read())
school_pdf = PDFGenerator()
res = school_pdf.get_pdf_from_BytesIO(students_filename, students_data, school_sign_filename, school_sign_data, school)
type(res)
print(res)
# with open("pdf-bytesio.pdf", "wb") as f:
#     f.write(res.encode())

Starting get_pdf_from_BytesIO...
Execution time for 4 pages: 0.03 second(s).


/tmp/u7jtalxj.pdf
CPU times: user 16.6 ms, sys: 16.7 ms, total: 33.3 ms
Wall time: 31.9 ms


In [15]:
%%time

students_file = 'data.csv'
school_sign = 'cachet.jpg'
school_pdf = PDFGenerator()
school_pdf.get_pdf_from_file(students_file, school_sign, school,'test.pdf')

Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Execution time for 4 pages: 0.01 second(s).
Execution time for 4 pages: 0.01 second(s).


CPU times: user 12.2 ms, sys: 378 µs, total: 12.6 ms
Wall time: 11.5 ms


From https://readxl.tidyverse.org/reference/excel_format.html
    
File signatures (in hexadecimal) for xlsx vs xls:
- xlsx: First 4 bytes are 50 4B 03 04
- xls: First 8 bytes are D0 CF 11 E0 A1 B1 1A E1
- CSV : b'\xef\xbb\xbfNo'
- XLS : b'\xd0\xcf\x11\xe0\xa1'
- XLSX : b'PK\x03\x04\x14'

In [16]:
#hide
files = ['./assets/Fichier des élèves.csv', './assets/Fichier des élèves.xls', './assets/Fichier des élèves.xlsx','data.csv']

for students_file in files :
    print("-----------", students_file)
    print(type(students_file))
    with open(students_file, 'rb') as fh:
     students_data = io.BytesIO(fh.read())
#     mime = magic.from_buffer(students_data.read(10))
#     print('mime', mime)
    type(students_data)
    header = students_data.read(3)
    students_data.seek(0,0)
    ascii_text = all(c > 60 and c < 128 for c in header)
    print(header, ascii_text)
    XLS = b'\xd0\xcf\x11'
    XLSX = b'PK\x03'
    CSV = b'\xef\xbb\xbf' # UTF-8 Unicode (with BOM) text
    if header == CSV or ascii_text:
        print('CSV !')
        #if '.csv' in students_filename:
        # The user uploaded a CSV file
        df = pd.read_csv(students_data)
        if len(df.columns) == 1: # Is separator , ?
            students_data.seek(0,0)
            df = pd.read_csv(students_data, sep=';')
            if len(df.columns) == 1: # Is separator \t ?
                students_data.seek(0,0)
                df = pd.read_csv(students_data, sep='\t')
    else:
        print('Excel !')
        df = pd.read_excel(students_data)
df

----------- ./assets/Fichier des élèves.csv
<class 'str'>


_io.BytesIO

0

b'\xef\xbb\xbf' False
CSV !


0

----------- ./assets/Fichier des élèves.xls
<class 'str'>


_io.BytesIO

0

b'\xd0\xcf\x11' False
Excel !
----------- ./assets/Fichier des élèves.xlsx
<class 'str'>


_io.BytesIO

0

b'PK\x03' False
Excel !
----------- data.csv
<class 'str'>


_io.BytesIO

0

b'Nom' True
CSV !


Unnamed: 0,NomParent,PrenomParent,NomEnfant,PrenomEnfant,DateNaissance,Moyen
0,NomParent1,PrenomParent1,NomEnfant1,PrenomEnfant1,12/06/2010,pied
1,NomParent2,PrenomParent2,NomEnfant2,PrenomEnfant2,12/04/2011,pied
2,NomParent3,PrenomParent3,NomEnfant3,PrenomEnfant3,12/04/2011,vélo
3,NomParent4,PrenomParent4,NomEnfant4,"Prenom, Enfant4",12/04/2011,car


In [17]:
%%time
students_file = './assets/Fichier des élèves.csv'
school_pdf = PDFGenerator()
school_pdf.get_pdf_from_file(students_file, school_sign, school,'test-csv.pdf')

Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Execution time for 343 pages: 0.119 second(s).
Execution time for 343 pages: 0.119 second(s).
Execution time for 343 pages: 0.119 second(s).


CPU times: user 122 ms, sys: 622 µs, total: 122 ms
Wall time: 121 ms


In [18]:
%%time
students_file = './assets/Fichier des élèves.xls'
school_pdf = PDFGenerator()
school_pdf.get_pdf_from_file(students_file, school_sign, school,'test-xls.pdf')

Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Execution time for 343 pages: 0.124 second(s).
Execution time for 343 pages: 0.124 second(s).
Execution time for 343 pages: 0.124 second(s).
Execution time for 343 pages: 0.124 second(s).


CPU times: user 122 ms, sys: 6.66 ms, total: 129 ms
Wall time: 127 ms


In [19]:
%%time
students_file = './assets/Fichier des élèves.xlsx'
school_pdf = PDFGenerator()
school_pdf.get_pdf_from_file(students_file, school_sign, school,'test-xlsx.pdf')

Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Starting get_pdf_from_file for Ecole Jean Jaurès
Execution time for 343 pages: 0.153 second(s).
Execution time for 343 pages: 0.153 second(s).
Execution time for 343 pages: 0.153 second(s).
Execution time for 343 pages: 0.153 second(s).
Execution time for 343 pages: 0.153 second(s).


CPU times: user 159 ms, sys: 0 ns, total: 159 ms
Wall time: 158 ms
