# Enseignement supérieur - round 2

Nous avons reçu un fichier plus complet et avons également amélioré l'outil de rapprochement. Nous faisons tourner cet outil sur le nouveau fichier.


In [9]:
import csv
import json
import pandas as pd
import os
from batid.utils.misc import is_float
from batid.services.guess_bdg_new import Guesser

raw_file = "data/bat_rnb_mesr_complet.csv"
work_file = "results/guess.json"
final_file = "results/enseignement_superieur_w_rnb_id.csv"

os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"


## Fonction pour transformer les données brutes en "row" pour l'objet Guesser


In [None]:
def _row_to_guess_params(row):
    # Address
    address = f"{row['Adresse']}, {row['CP']} {row['Ville']}".strip()

    # Coords
    lat = float(row["lat"]) if is_float(row["lat"]) else None
    lng = float(row["long"]) if is_float(row["long"]) else None

    # Name
    big_place_name = row.get("Etablissement", None)
    bdg_name = row.get("Libellé bât/ter", None)
    name = f"{big_place_name} {bdg_name}".strip()

    return {
        "ext_id": row.get("clef", None),
        "lat": lat,
        "lng": lng,
        "name": name,
        "address": address,
    }

### Création du fichier de travail si nécessaire

In [None]:
if not os.path.exists(work_file):
    with open(raw_file, "r") as f:
        print("- creating work file -")

        reader = csv.DictReader(f, delimiter=";")
        data = [_row_to_guess_params(row) for row in reader]

        guesser = Guesser()
        guesser.create_work_file(data, work_file)

### Rapprochement

In [None]:
guesser = Guesser()
guesser.guess_work_file(work_file)

### Rapport

In [3]:
guesser = Guesser()
guesser.load_work_file(work_file)
guesser.report()

-- Report --
Number of rows: 9499
Number of match: 5915 (62.27%)

-- match_reasons : absolute --
point_on_bdg             5248
isolated_closest_bdg      573
found_name_in_osm          48
precise_address_match      46
Name: match_reason, dtype: int64

-- match_reasons : % --
point_on_bdg             55.247921
isolated_closest_bdg      6.032214
found_name_in_osm         0.505316
precise_address_match     0.484262
Name: match_reason, dtype: float64


### Conversion au format enseignement supérieur

In [11]:
guesser = Guesser()
guesser.load_work_file(work_file)

final_data = []

with open(raw_file, "r") as f:
    reader = csv.DictReader(f, delimiter=";")

    for row in reader:
        ext_id = row["clef"]
        
        row['rnb_id'] = None
        row['rnb_shape'] = None
        
        if ext_id in guesser.guesses:
            match = guesser.guesses[ext_id]["match"]
            if match:
                
                bdg = Building.objects.get(rnb_id=match['rnb_id'])
                
                
                row['rnb_id'] = match['rnb_id']
                row['rnb_shape'] = bdg.shape.wkt
                
        final_data.append(row)
        

with open(final_file, "w") as f:
    
    writer = csv.DictWriter(f, fieldnames=final_data[0].keys(), delimiter=";")
    writer.writeheader()
    writer.writerows(final_data)
                
print('- done -')    

- done -
