### Refactoring de l'algo matching
- l'ancien contient des stades redondants, qui ne dérangent pas en cas de traitement batch, mais limitent la performance en temps réel
- plusieurs possibilités s'ouvrent pour accélérer le temps d'exécution de l'algo

In [1]:
# Connect to database
import psycopg2
import psycopg2.extras
# Local
connection = psycopg2.connect(dsn="postgres://jupyter:jupyter@database:5432/andi")

# Tunnel VPS OVH
# connection = psycopg2.connect(dsn="postgres://jupyter:jupyter@localhost:63333/andi_entreprises")

# Tunnel Azure
# connection = psycopg2.connect(dsn="postgres://jupyter:jupyter@localhost:63333/andi")

### Test temps d'exécution
For Lyon:
- with order by: 386 ms
- without order by: 260 ms
- without dist nor order by : 109 ms



In [None]:
%%timeit -n4 -r4
sql_mask = """
    SELECT
        id_internal,
        commune
    -- earth_distance(ll_to_earth(%(lat)s, %(lon)s), ll_to_earth(lat, lon)) AS dist
    FROM
        entreprises
    WHERE
        earth_box(ll_to_earth(%(lat)s, %(lon)s), 15 * 1000) @> ll_to_earth(lat, lon)
    -- ORDER BY earth_box(ll_to_earth(%(lat)s, %(lon)s), 10 * 1000) @> ll_to_earth(lat, lon) ASC
    
    """
connection.commit()
with connection.cursor() as cur:
    cur.execute(
       sql_mask,
        # {'lat':'46.6743', 'lon':'5.5492'} # Province
        {'lat':'48.878', 'lon':'2.301'}  # Paris
        # {'lat': '45.765', 'lon': '4.834'} # Lyon
    )
    r = cur.fetchall()


### Idea
Implement binning by successive earth boxes !
- First third, second third, third - third

Use postgis
- install postgis postgresql-11-postgis-2.5 libsfcgal-dev
- CREATE EXTENSION postgis;
- add column: `alter table entreprises add column geom geometry(Point, 4326); `
- fill it : `update entreprises set geom = ST_SetSRID(ST_MakePoint(lon, lat), 4326);`
- index it: `create index entreprises_geom on entreprises using gist((geom::geography)); `
- Using geometry instead of geography: no need to consider a spherical earth (sphere calculations take more time)
- Doc: https://gis.stackexchange.com/questions/6681/what-are-the-pros-and-cons-of-postgis-geography-and-geometry-types http://postgis.net/workshops/postgis-intro/geography.html

In [19]:
full_query = """
SELECT
    id_internal AS id,
    nom,
    round(prm.dist::numeric/1000, 2) || ' km' AS distance,
    prm.lat AS lat,
    prm.lon AS lon,
    prm.siret AS siret,
    score_naf,
    score_welcome,
    score_contact,
    score_size,
    score_geo,
    (   score_geo * 1 +
        score_size * 3 +
        score_contact * 3 +
        score_welcome * 3 + score_naf * 5 
    )
    AS score_total
FROM
   (
   SELECT
        id_internal,
        nom,
        lat,
        lon,
        siret,
        commune,
        naf,
        ST_Distance(geom, orig_geom)
            AS dist,
        -- crit geo ---------------------------------------------
        6 - NTILE(5) OVER(
            ORDER BY ST_Distance(geom, orig_geom) ASC
        ) AS score_geo,
        -- crit naf ----------------------------------------------
        CASE e.naf
            WHEN '4622Z' THEN 5
            WHEN '0130Z' THEN 4
            WHEN '0119Z' THEN 4
            WHEN '01' THEN 3
            ELSE 1
        END AS score_naf,
        -- crit size ---------------------------------------------
        CASE e.taille
            WHEN '1-2' THEN 1
            ELSE 1
        END AS score_size,
        -- crit welcome ------------------------------------------
        CASE
            WHEN e.pmsmp_interest THEN 2
            WHEN (e.pmsmp_interest) AND (e.pmsmp_count_recent > 0) THEN 3
            ELSE 1
        END AS score_welcome,
        -- crit contact ------------------------------------------
        CASE
            WHEN (COALESCE(e.email_official, '') <> '')
            OR (COALESCE(e.contact_1_phone, '') <> '')
            OR (COALESCE(e.contact_2_phone, '') <> '') THEN 2
            WHEN (COALESCE(e.contact_1_mail, '') <> '')
            OR (COALESCE(e.contact_2_mail, '') <> '') THEN 3
            ELSE 1
        END AS score_contact
    FROM
        entreprises e
    CROSS JOIN 
        (SELECT ST_MakePoint(%(lon)s, %(lat)s)::geography AS orig_geom) AS r
    WHERE
        ST_DWithin(geom, orig_geom, 10 * 1000)
    ORDER BY dist ASC
    ) AS prm
WHERE
    score_naf > 1
ORDER BY score_total DESC, distance ASC
LIMIT 100;
"""

In [20]:
with connection.cursor() as cur:
    match_sql = cur.mogrify(
    full_query,
    #{'lat':'46.6743', 'lon':'5.5492'} # Province (lons-le-saunier)
    {'lat':'48.878', 'lon':'2.301'}  # Paris
    # {'lat': '45.765', 'lon': '4.834'} # Lyon
)
print(match_sql.decode())


SELECT
    id_internal AS id,
    nom,
    round(prm.dist::numeric/1000, 2) || ' km' AS distance,
    prm.lat AS lat,
    prm.lon AS lon,
    prm.siret AS siret,
    score_naf,
    score_welcome,
    score_contact,
    score_size,
    score_geo,
    (   score_geo * 1 +
        score_size * 3 +
        score_contact * 3 +
        score_welcome * 3 + score_naf * 5 
    )
    AS score_total
FROM
   (
   SELECT
        id_internal,
        nom,
        lat,
        lon,
        siret,
        commune,
        naf,
        ST_Distance(geom, orig_geom)
            AS dist,
        -- crit geo ---------------------------------------------
        6 - NTILE(5) OVER(
            ORDER BY ST_Distance(geom, orig_geom) ASC
        ) AS score_geo,
        -- crit naf ----------------------------------------------
        CASE e.naf
            WHEN '4622Z' THEN 5
            WHEN '0130Z' THEN 4
            WHEN '0119Z' THEN 4
            WHEN '01' THEN 3
            ELSE 1
        END AS score_naf

In [21]:
# %%timeit -n3 -r3
import pandas as pd
import pandas.io.sql as sqlio
from IPython.display import display, HTML

with connection.cursor() as cur:
    match_sql = cur.mogrify(
    full_query,
    {'lat':'46.6743', 'lon':'5.5492'} # Province (lons-le-saunier)
    # {'lat':'48.878', 'lon':'2.301'}  # Paris
    # {'lat': '45.765', 'lon': '4.834'} # Lyon
)

# print(match_sql.decode())

df = sqlio.read_sql_query(match_sql, connection)
df.head(30)

Unnamed: 0,id,nom,distance,lat,lon,siret,score_naf,score_welcome,score_contact,score_size,score_geo,score_total
0,4061116,BONNEFOY JEROME,3.31 km,46.661338,5.588094,52321539000015,4,1,1,1,2,31
1,4054617,JUSTE PLANTE,3.43 km,46.688588,5.509459,48065507500012,4,1,1,1,2,31
2,4068351,PEPINIERES PIERRE ANTIER FILS DOMAINE DE JEAND...,5.33 km,46.640303,5.500176,53829773000011,4,1,1,1,1,30
3,4064401,SOCIETE CIVILE D'EXPLOITATION AGRICOLE CARMANT...,9.49 km,46.732329,5.458198,79115373700014,4,1,1,1,1,30


## Résults:
%%timeit -n3 -r3
### v1 (vanilla)
- Lyon 1.09 s ± 24.3 ms per loop
- Paris 8.03 s ± 481 ms per loop
- Province 71.4 ms ± 26 ms per loop

### v2 (cube boxing binning)
- Lyon 1.91 s ± 8.47 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)
- Paris 16 s ± 515 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)
- Province 102 ms ± 3.84 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)

### v3 (naf limiting geo binning)
- Lyon 785 ms ± 14 ms per loop 
- Paris 8.77 s ± 370 ms per loop
- Province 46.6 ms ± 4.74 ms per loop

### v4 (naf pre-filtering)
- Lyon -
- Paris -
- Province 34.8 s ± 1.8 s per loop

### v5 (PostGIS, v1)
- Lyon -
- Paris - 45 s ?
- Province 358 ms ± 3.14 ms per loop

### v6 (PostGIS, v2 - full rewrite)
- Lyon - 155 ms ± 2.45 ms per loop
- Paris - 832 ms ± 3.67 ms per loop 
- Province 33.7 ms ± 2.05 ms per loop

In [67]:
mask_sql = """
SELECT
     nom,
     earth_distance(ll_to_earth(%(lat)s, %(lon)s), ll_to_earth(lat, lon))
        AS earth_distance,
     ST_Distance(geom, ST_MakePoint(%(lon)s, %(lat)s)::geography)
        AS st_distance
FROM
    entreprises
WHERE siret = %(siret)s
"""
with connection.cursor() as cur:
    check_sql = cur.mogrify(
    mask_sql,
    {'siret': '52321539000015', 'lat':'46.6743', 'lon':'5.5492'}
)
# print(match_sql.decode())
df = sqlio.read_sql_query(check_sql, connection)
df

Unnamed: 0,nom,earth_distance,st_distance
0,M JEROME BONNEFOY,3302.9904,3306.84423


In [16]:
# V1
v1_query = """
WITH comp_pos AS (
    SELECT
        id_internal,
        commune,
        earth_distance(ll_to_earth(%(lat)s, %(lon)s), ll_to_earth(lat, lon))
            AS dist
    FROM
        entreprises
    WHERE
        earth_box(ll_to_earth(%(lat)s, %(lon)s), 10 * 1000) @> ll_to_earth(lat, lon)
    -- ORDER BY earth_box(ll_to_earth(%(lat)s, %(lon)s), 10 * 1000) @> ll_to_earth(lat, lon) ASC
    
), crit_geo AS (
 -- crit geo ----------------------------------------------
    SELECT
        id_internal,
        dist,
        4 - NTILE(3) OVER(
            ORDER BY dist ASC
        ) AS score
    FROM comp_pos
    ORDER BY dist ASCf
), crit_size AS (
-- crit size ---------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE e.taille
WHEN '1-2' THEN 1
ELSE 1
END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_naf AS (
-- crit naf ----------------------------------------------
     SELECT
        comp_pos.id_internal,
        CASE e.naf
WHEN '4622Z' THEN 5
WHEN '0130Z' THEN 4
WHEN '0119Z' THEN 4
WHEN '01' THEN 3
ELSE 1
END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_welcome AS (
-- crit welcome ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN e.pmsmp_interest THEN 2
            WHEN (e.pmsmp_interest) AND (e.pmsmp_count_recent > 0) THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_contact AS (
-- crit contact ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN (COALESCE(cc.email_official, '') <> '')
            OR (COALESCE(cc.contact_1_phone, '') <> '')
            OR (COALESCE(cc.contact_2_phone, '') <> '') THEN 2
            WHEN (COALESCE(cc.contact_1_mail, '') <> '')
            OR (COALESCE(cc.contact_2_mail, '') <> '') THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises cc ON cc.id_internal = comp_pos.id_internal
    )
SELECT
    e.id_internal as id,
    e.nom AS nom,
    round(cr_ge.dist/1000) || ' km' AS distance,
    e.lat AS lat,
    e.lon AS lon,
    e.siret AS siret,
    cr_nf.score AS score_naf,
    cr_wc.score AS score_welcome,
    cr_cn.score AS score_contact,
    cr_si.score AS score_size,
    cr_ge.score AS score_geo,
    cr_ge.score * 1 + cr_si.score * 3 + cr_cn.score * 3 + cr_wc.score * 3 + cr_nf.score * 5 AS score_total
FROM
    crit_geo cr_ge
INNER JOIN
    crit_size cr_si ON cr_si.id_internal = cr_ge.id_internal
INNER JOIN
    crit_naf cr_nf ON cr_nf.id_internal = cr_ge.id_internal
INNER JOIN
    crit_welcome cr_wc ON cr_wc.id_internal = cr_ge.id_internal
INNER JOIN
    crit_contact cr_cn ON cr_cn.id_internal = cr_ge.id_internal
INNER JOIN
    entreprises e ON e.id_internal = cr_ge.id_internal
LEFT JOIN
    naf ON e.naf = naf.sous_classe_a_732
ORDER BY score_total DESC
LIMIT 100
"""

In [18]:
v2_query = """
WITH query_param AS (
    SELECT
        earth_box(ll_to_earth(%(lat)s, %(lon)s), 10 * 1000) AS full_range,
        earth_box(ll_to_earth(%(lat)s, %(lon)s), 3 * 1000) AS short_range,
        earth_box(ll_to_earth(%(lat)s, %(lon)s), 6 * 1000) AS med_range,
        ll_to_earth(%(lat)s, %(lon)s) AS point
), comp_pos AS (
    SELECT
        id_internal,
        commune,
        lat,
        lon
    FROM
        entreprises, query_param qp
    WHERE
        qp.full_range @> ll_to_earth(lat, lon)
), crit_geo AS (

 -- crit geo ----------------------------------------------
    SELECT
        cp.id_internal,
        CASE 
            WHEN qp.short_range @> ll_to_earth(cp.lat, cp.lon) THEN 5 
            WHEN qp.med_range @> ll_to_earth(cp.lat, cp.lon) THEN 3
            ELSE 1
        END AS score
    FROM comp_pos cp, query_param qp
), crit_size AS (

-- crit size ---------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE e.taille
WHEN '1-2' THEN 1
ELSE 1
END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_naf AS (

-- crit naf ----------------------------------------------
     SELECT
        comp_pos.id_internal,
        CASE e.naf
WHEN '4622Z' THEN 5
WHEN '0130Z' THEN 4
WHEN '0119Z' THEN 4
WHEN '01' THEN 3
ELSE 1
END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_welcome AS (
-- crit welcome ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN e.pmsmp_interest THEN 2
            WHEN (e.pmsmp_interest) AND (e.pmsmp_count_recent > 0) THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_contact AS (
-- crit contact ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN (COALESCE(cc.email_official, '') <> '')
            OR (COALESCE(cc.contact_1_phone, '') <> '')
            OR (COALESCE(cc.contact_2_phone, '') <> '') THEN 2
            WHEN (COALESCE(cc.contact_1_mail, '') <> '')
            OR (COALESCE(cc.contact_2_mail, '') <> '') THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises cc ON cc.id_internal = comp_pos.id_internal
    )
SELECT
    e.id_internal as id,
    e.nom AS nom,
    e.lat AS lat,
    e.lon AS lon,
    e.siret AS siret,
    round(earth_distance(qp.point, ll_to_earth(e.lat, e.lon))/1000) || ' km' AS distance,
    cr_nf.score AS score_naf,
    cr_wc.score AS score_welcome,
    cr_cn.score AS score_contact,
    cr_si.score AS score_size,
    cr_ge.score AS score_geo,
    cr_ge.score * 1 + cr_si.score * 3 + cr_cn.score * 3 + cr_wc.score * 3 + cr_nf.score * 5 AS score_total
FROM
    query_param qp, crit_geo cr_ge
INNER JOIN
    crit_size cr_si ON cr_si.id_internal = cr_ge.id_internal
INNER JOIN
    crit_naf cr_nf ON cr_nf.id_internal = cr_ge.id_internal
INNER JOIN
    crit_welcome cr_wc ON cr_wc.id_internal = cr_ge.id_internal
INNER JOIN
    crit_contact cr_cn ON cr_cn.id_internal = cr_ge.id_internal
INNER JOIN
    entreprises e ON e.id_internal = cr_ge.id_internal
LEFT JOIN
    naf ON e.naf = naf.sous_classe_a_732
ORDER BY score_total DESC, distance ASC
LIMIT 100
"""

In [19]:
v3_query = """
WITH comp_pos AS (
    SELECT
        id_internal,
        commune,
        lat,
        lon
    FROM
        entreprises
    WHERE
        earth_box(ll_to_earth(%(lat)s, %(lon)s), 10 * 1000) @> ll_to_earth(lat, lon)
    -- ORDER BY earth_box(ll_to_earth(%(lat)s, %(lon)s), 10 * 1000) @> ll_to_earth(lat, lon) ASC
    
), crit_size AS (
-- crit size ---------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE e.taille
WHEN '1-2' THEN 1
ELSE 1
END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_naf AS (
-- crit naf ----------------------------------------------
     SELECT
        comp_pos.id_internal,
        CASE e.naf
WHEN '4622Z' THEN 5
WHEN '0130Z' THEN 4
WHEN '0119Z' THEN 4
WHEN '01' THEN 3
ELSE 1
END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
    ORDER BY score DESC
    LIMIT 500
), crit_welcome AS (
-- crit welcome ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN e.pmsmp_interest THEN 2
            WHEN (e.pmsmp_interest) AND (e.pmsmp_count_recent > 0) THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_contact AS (
-- crit contact ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN (COALESCE(cc.email_official, '') <> '')
            OR (COALESCE(cc.contact_1_phone, '') <> '')
            OR (COALESCE(cc.contact_2_phone, '') <> '') THEN 2
            WHEN (COALESCE(cc.contact_1_mail, '') <> '')
            OR (COALESCE(cc.contact_2_mail, '') <> '') THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises cc ON cc.id_internal = comp_pos.id_internal
), crit_geo AS (
 -- crit geo ----------------------------------------------
    SELECT
        id_internal,
        dist,
        4 - NTILE(3) OVER(
            ORDER BY dist ASC
        ) AS score
    FROM (
        SELECT
            comp_pos.id_internal,
            earth_distance(ll_to_earth(%(lat)s, %(lon)s), ll_to_earth(lat, lon)) AS dist
        FROM comp_pos
        INNER JOIN crit_naf cn ON cn.id_internal = comp_pos.id_internal
    ) naf_limited
)
SELECT
    e.id_internal as id,
    e.nom AS nom,
    round(cr_ge.dist/1000) || ' km' AS distance,
    e.lat AS lat,
    e.lon AS lon,
    e.siret AS siret,
    cr_nf.score AS score_naf,
    cr_wc.score AS score_welcome,
    cr_cn.score AS score_contact,
    cr_si.score AS score_size,
    cr_ge.score AS score_geo,
    cr_ge.score * 1 + cr_si.score * 3 + cr_cn.score * 3 + cr_wc.score * 3 + cr_nf.score * 5 AS score_total
FROM
    crit_geo cr_ge
INNER JOIN
    crit_size cr_si ON cr_si.id_internal = cr_ge.id_internal
INNER JOIN
    crit_naf cr_nf ON cr_nf.id_internal = cr_ge.id_internal
INNER JOIN
    crit_welcome cr_wc ON cr_wc.id_internal = cr_ge.id_internal
INNER JOIN
    crit_contact cr_cn ON cr_cn.id_internal = cr_ge.id_internal
INNER JOIN
    entreprises e ON e.id_internal = cr_ge.id_internal
LEFT JOIN
    naf ON e.naf = naf.sous_classe_a_732
ORDER BY score_total DESC
LIMIT 100
"""

In [20]:
v4_query = """
WITH limit_naf AS (
    SELECT
        id_internal,
        commune,
        lat,
        lon,
        CASE naf
WHEN '4622Z' THEN 5
WHEN '0130Z' THEN 4
WHEN '0119Z' THEN 4
WHEN '01' THEN 3
ELSE 1
END AS naf_score
    FROM
        entreprises
    ORDER BY naf_score DESC
), comp_pos AS (
    SELECT
        id_internal,
        commune,
        earth_distance(ll_to_earth(%(lat)s, %(lon)s), ll_to_earth(lat, lon))
            AS dist
    FROM
        limit_naf
    WHERE
        earth_box(ll_to_earth(%(lat)s, %(lon)s), 10 * 1000) @> ll_to_earth(lat, lon)
    ORDER BY limit_naf.naf_score DESC
    LIMIT 1000
), crit_geo AS (
 -- crit geo ----------------------------------------------
    SELECT
        id_internal,
        dist,
        4 - NTILE(3) OVER(
            ORDER BY dist ASC
        ) AS score
    FROM comp_pos
    ORDER BY dist ASC
), crit_size AS (
-- crit size ---------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE e.taille
WHEN '1-2' THEN 1
ELSE 1
END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_naf AS (
-- crit naf ----------------------------------------------
     SELECT
        limit_naf.id_internal,
        limit_naf.naf_score as score
    FROM limit_naf
), crit_welcome AS (
-- crit welcome ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN e.pmsmp_interest THEN 2
            WHEN (e.pmsmp_interest) AND (e.pmsmp_count_recent > 0) THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises e ON e.id_internal = comp_pos.id_internal
), crit_contact AS (
-- crit contact ------------------------------------------
    SELECT
        comp_pos.id_internal,
        CASE
            WHEN (COALESCE(cc.email_official, '') <> '')
            OR (COALESCE(cc.contact_1_phone, '') <> '')
            OR (COALESCE(cc.contact_2_phone, '') <> '') THEN 2
            WHEN (COALESCE(cc.contact_1_mail, '') <> '')
            OR (COALESCE(cc.contact_2_mail, '') <> '') THEN 3
            ELSE 1
        END AS score
    FROM comp_pos
    INNER JOIN
        entreprises cc ON cc.id_internal = comp_pos.id_internal
    )
SELECT
    e.id_internal as id,
    e.nom AS nom,
    round(cr_ge.dist/1000) || ' km' AS distance,
    e.lat AS lat,
    e.lon AS lon,
    e.siret AS siret,
    cr_nf.score AS score_naf,
    cr_wc.score AS score_welcome,
    cr_cn.score AS score_contact,
    cr_si.score AS score_size,
    cr_ge.score AS score_geo,
    cr_ge.score * 1 + cr_si.score * 3 + cr_cn.score * 3 + cr_wc.score * 3 + cr_nf.score * 5 AS score_total
FROM
    crit_geo cr_ge
INNER JOIN
    crit_size cr_si ON cr_si.id_internal = cr_ge.id_internal
INNER JOIN
    crit_naf cr_nf ON cr_nf.id_internal = cr_ge.id_internal
INNER JOIN
    crit_welcome cr_wc ON cr_wc.id_internal = cr_ge.id_internal
INNER JOIN
    crit_contact cr_cn ON cr_cn.id_internal = cr_ge.id_internal
INNER JOIN
    entreprises e ON e.id_internal = cr_ge.id_internal
LEFT JOIN
    naf ON e.naf = naf.sous_classe_a_732
ORDER BY score_total DESC
LIMIT 100
"""