In [2]:
import json
import geopandas as gpd
import pandas as pd
from django.contrib.gis.geos import GEOSGeometry
from shapely.geometry import mapping
from batid.services.guess_bdg_new import Guesser, PartialRoofHandler

f_path = './export_rnb.gpkg'
workfile_path = "./guess.json"

In [2]:
gdf = gpd.read_file(f_path, layer="toit")

print('SRID')
srid = gdf.crs.to_epsg()
print(srid)

print("Get a sneakpeak of the data")
print(gdf.head())

print("Rows count")
print(len(gdf))



SRID
3946
Get a sneakpeak of the data
   gid  fid     id        parcelle       typetoit  hfacade  ztoitmini  \
0  NaN    1  42119  69259000CA0036  Toit non plan      5.6      217.0   
1  NaN    1  37869  69259000CE0104      Toit plan     20.2      231.8   
2  NaN    1  37885  69259000BY0082  Toit non plan      4.4      207.7   
3  NaN    1  37886  69259000BY0093      Toit plan      2.7      207.3   
4  NaN    1  37887  69259000BY0093  Toit non plan      4.4      207.7   

   ztoitmaxi  hmax  millesime  _predicate  \
0      219.8   8.4       2018  INTERSECTS   
1      231.8  20.2       2018  INTERSECTS   
2      209.1   5.8       2018  INTERSECTS   
3      207.3   2.7       2018  INTERSECTS   
4      209.1   5.8       2018  INTERSECTS   

                                            geometry  
0  POLYGON ((1844725.515 5168325.123, 1844724.025...  
1  POLYGON ((1845761.464 5167720.974, 1845759.953...  
2  POLYGON ((1845879.239 5168615.414, 1845879.364...  
3  POLYGON ((1845882.089 5168610

In [3]:
# Define function to transform geopackage rows into guess work file inputs
def to_input(row):
    
    geom_geojson = mapping(row["geometry"])
    geom = GEOSGeometry(json.dumps(geom_geojson))
    geom.srid = srid
    geom.transform(4326)
    
    
    return {
        'ext_id': row["id"],
        'polygon': json.loads(geom.json)
    }

In [4]:
# Build inputs
inputs = []
max_len = None

for idx, row in gdf.iterrows():
    inputs.append(to_input(row))
    
    if max_len and len(inputs) >= max_len:
        break

print('Sneak peak of row to inputs transformation')
print(inputs[:1])


# Add just a few guesses to check first results
guesser = Guesser()
guesser.create_work_file(inputs, workfile_path)

print(f"Guesser had {len(guesser.guesses)} guesses")



Sneak peak of row to inputs transformation
[{'ext_id': 42119, 'polygon': {'type': 'Polygon', 'coordinates': [[[4.858588636054994, 45.699804623187475], [4.8585692463174, 45.69979684270552], [4.858548765629505, 45.69980366416946], [4.858588636054994, 45.699804623187475]]]}}]
Guesser had 9862 guesses


In [5]:
# Launch the guess work with specific handlers
guesser = Guesser()
guesser.handlers = [PartialRoofHandler()]
guesser.guess_work_file(workfile_path)

print(f"Worked on {len(guesser.guesses)} guesses")

# Report
guesser.report()

Worked on 9862 guesses
-- Report --
Number of rows: 9862
Number of match: 7720 (78.28%)

-- match_reasons : absolute --
sole_bdg_intersects_roof_enough     7371
isolated_bdg_intersects_roof         185
many_bdgs_covered_enough_by_roof     164
Name: match_reason, dtype: int64

-- match_reasons : % --
sole_bdg_intersects_roof_enough     74.741432
isolated_bdg_intersects_roof         1.875887
many_bdgs_covered_enough_by_roof     1.662949
Name: match_reason, dtype: float64


## Vérification de la présence de faux positifs

In [6]:
guesser = Guesser()
guesser.load_work_file(workfile_path)


guesser.display_reason("sole_bdg_intersects_roof_enough", 50)

     input_ext_id  match_rnb_id                     match_reason
2392       369676  GMZJRKH5QRZW  sole_bdg_intersects_roof_enough
594        125338  ZA5GTQDMX3FH  sole_bdg_intersects_roof_enough
4642       409624  BNGEK6XTEBNX  sole_bdg_intersects_roof_enough
1686       353179  EVEZ32AM3YFG  sole_bdg_intersects_roof_enough
6770       445358  6D5WYH32353N  sole_bdg_intersects_roof_enough
7811       512854  RWT7CQ226KVD  sole_bdg_intersects_roof_enough
7486       510867  52F8N9QS92AF  sole_bdg_intersects_roof_enough
7013       461799  2HWCPAMT3WWQ  sole_bdg_intersects_roof_enough
2346       368343  CCASM2BMNFR1  sole_bdg_intersects_roof_enough
3676       388759  TGYPPJNSQVK9  sole_bdg_intersects_roof_enough
1917       360986  ER9QEBK86CY1  sole_bdg_intersects_roof_enough
8602       525178  2EDQ7RZCHEHN  sole_bdg_intersects_roof_enough
3911       394804  TK89K4SHC38C  sole_bdg_intersects_roof_enough
3026       379516  7QHPDNEJT7AD  sole_bdg_intersects_roof_enough
2419       370752  KC6Q74

Après inspection d'une quarantaine de résultats ayant une raison `one_bdg_intersects_roof_enough`, on constate l'absence de faux positifs.
On peut se demander jusqu'à combien on peut descendre le taux de recouvrement minimum (pour le moment 50%) avant d'obtenir des faux positifs.

In [7]:
guesser = Guesser()
guesser.load_work_file(workfile_path)


guesser.display_reason("isolated_bdg_intersects_roof", 50)

     input_ext_id  match_rnb_id                  match_reason
153         52027  GFA9GMYJ7FX1  isolated_bdg_intersects_roof
1951       362465  RFMNCPY98S7Y  isolated_bdg_intersects_roof
8650       525509  F7H1EWVRHETB  isolated_bdg_intersects_roof
2038       363253  Z75JRWHQ8A53  isolated_bdg_intersects_roof
6903       455885  ZWVSEE9VQN39  isolated_bdg_intersects_roof
8704       525887  CN1T7SM1YT3W  isolated_bdg_intersects_roof
7203       465738  94JC3TT2Y89H  isolated_bdg_intersects_roof
2096       363850  A5ZSAZMT2ATH  isolated_bdg_intersects_roof
2114       364062  RPETQK6PF2P8  isolated_bdg_intersects_roof
4414       404413  9CSB72VHADJC  isolated_bdg_intersects_roof
1980       362768  K4EB75YX8VXG  isolated_bdg_intersects_roof
1563       340768  RPETQK6PF2P8  isolated_bdg_intersects_roof
8713       526052  GE4W537YRYZ7  isolated_bdg_intersects_roof
5824       424868  FMTK7ECVD35V  isolated_bdg_intersects_roof
7740       512498  7Q3A5RCSQ4Z5  isolated_bdg_intersects_roof
3837    

Après inspection de 50 match pour raison `isolated_closest_bdg_intersects_roof` pris de façon aléatoire, on ne constate pas de faux positifs

In [5]:
guesser = Guesser()
guesser.load_work_file(workfile_path)


guesser.display_reason("many_bdgs_covered_enough_by_roof", 20, ["input_ext_id", "match_rnb_id", "match_match_details_rnb_ids"])

     input_ext_id  match_rnb_id match_match_details_rnb_ids
3234       382491  QCVC8X8R3QF7   QCVC8X8R3QF7,B54F4SK4X8WZ
1541       339956  DXJ918V1M6NR   DXJ918V1M6NR,2TZ452NWX4FY
7840       513119  N677GKDJBRT5                N677GKDJBRT5
3678       388777  NFBFB23829GY   NFBFB23829GY,D719X9TJQVGD
2986       378940  FD42M26XWVE4                FD42M26XWVE4
4140       397625  663MT8MSFXGV                663MT8MSFXGV
6777       445508  Z88A39CHFAYX                Z88A39CHFAYX
8502       524058  6NAHWDRWWJP7   6NAHWDRWWJP7,865MVMV8QBAT
7487       510874  9ZPVBPT79JSZ                9ZPVBPT79JSZ
2774       376170  R9FDDB3GSVTH   R9FDDB3GSVTH,YJNJ9DQR3RZM
2371       368771  5G2Q8C2CZWX9                5G2Q8C2CZWX9
8046       515902  82331NGJ4J49                82331NGJ4J49
8633       525317  XWA2J47P3Z3F                XWA2J47P3Z3F
1349       143522  GVKS4R2EZ3V6                GVKS4R2EZ3V6
3483       386012  XDVXKME4X2J2                XDVXKME4X2J2
231         56130  VYSZJF4Z9FHJ         

## Inspection des non-match

In [9]:
guesser = Guesser()
guesser.load_work_file(workfile_path)
guesser.display_nomatches(30)

     input_ext_id
8913       538188
9496       563563
9606       563673
7083       462391
4275       400362
9519       563586
9735       575660
9737       575662
8049       515908
9289       563235
5791       424444
320         59518
6973       461522
9678       575344
4520       406916
325         59523
6476       437192
6892       453849
1238       141712
655        127419
8722       526084
4927       413385
2041       363278
6677       442809
9539       563606
2064       363492
7028       461911
4399       403744
9828       576124
8285       521936


## Vérification de cas de non-match



### 404787 -> 79QT2KSPZTYT 
- Je ne comprends pas pourquoi le résultat ne sort pas, à l'oeil je pense qu'il devrait sortir.
- Après investigation, il apparait que le bon resultat de sortait pas car nous ne retenions que les deux premiers bâtiments les plus proche, hors dans ce cas nous avions plusieurs bâtiments ayant une distance à zéro. Le bon bâtiment ne se trouvait pas parmis les deux premiers bâtiments et n'était tout simple pas retenu. Pour corriger, nous avons gardé les 20 premiers bâtiments plutot que seulement les deux premiers


### 429191 -> KAA6XQQXB91N
- il devrait sortir je pense
- après investigation : c'était un cas similaire à "404787 -> 79QT2KSPZTYT".
- NB : on avait un bug important. Si on avait un seul bâtiment intersectant assez le toit parmi la liste des plus proches, alors on retournait le premier batiment de cette liste en résultat. Le souci est que ce n'est pas forcément ce bâtiment qui était celui qui correspondait.


### 127551 -> None 
- aucun bâtiment dans le RNB
- rien à faire

### 446291 -> None 
- ne touche aucun bâtiment dans le rnb
- rien à faire

### 373773 -> RPETQK6PF2P8
- devrait sortir. Est-ce que le second bâtiment le plus proche est trop proche ? Il est à environ 6,72 mètres. Le plafond est à 10m, est-ce qu'on l'abaisse ??
- Solution : en descendant la distance minimum à 6 mètre, on obtient le bon résultat

### 441662 -> F9VWBTYH4V24 : 
- devrait sortir. Est-ce que le second bâtiment le plus proche est trop proche ? Il est environ à 6,29m !
- Solution : en descendant la distance minimum à 6 mètre, on obtient le bon résultat

### 516611 -> SSY88WB5H2ZH : 
- devrait sortir. Pourquoi il ne sort pas ?
- Solution : idem "404787 -> 79QT2KSPZTYT"

### 138328 -> WZPJ61B6Z53K 
- devrait sortir. Pourquoi il ne sort pas ?
- Solution : idem "404787 -> 79QT2KSPZTYT"


In [10]:
# Check cases where intersection rate is low

guesser = Guesser()
guesser.load_work_file(workfile_path)

df = pd.json_normalize(list(guesser.guesses.values()), sep="_")


cases = df[(df['match_rnb_id'].notnull()) & (df['match_match_details_intersection_percentage'] <= 0.3) ]

print(len(cases))

cases.sample(10)

36


Unnamed: 0,match_reason,finished_steps,input_ext_id,input_polygon_type,input_polygon_coordinates,match_rnb_id,match_lat_lng,match_distance,match_match_details,match_match_details_intersection_percentage,match,match_match_details_rnb_ids
2337,sole_bdg_intersects_roof_enough,[partial_roof],368245,Polygon,"[[[4.862206999439553, 45.69198099992153], [4.8...",7KR4QV8EXXQ4,"45.69199149588687, 4.862275732018679",0.0,,0.277299,,
3820,sole_bdg_intersects_roof_enough,[partial_roof],393025,Polygon,"[[[4.821123000281464, 45.7553929996939], [4.82...",MBZVB52H8ZJM,"45.75524068402461, 4.821189070804633",0.0,,0.288861,,
5328,sole_bdg_intersects_roof_enough,[partial_roof],418543,Polygon,"[[[4.826747999704529, 45.761891000426395], [4....",6FV7EBY6HYMT,"45.76188285107897, 4.8268138951808925",0.0,,0.287403,,
9849,sole_bdg_intersects_roof_enough,[partial_roof],576145,Polygon,"[[[4.723362571836677, 45.74802597715703], [4.7...",29GKM1X5BFXA,"45.748021274233665, 4.72337398979822",0.0,,0.253237,,
8597,sole_bdg_intersects_roof_enough,[partial_roof],525018,Polygon,"[[[4.720383999388683, 45.75231499980302], [4.7...",NS52M6PSCBK6,"45.752262137885566, 4.720430910647115",0.0,,0.274985,,
2014,sole_bdg_intersects_roof_enough,[partial_roof],363074,Polygon,"[[[4.862658999640724, 45.691908999753984], [4....",ENCEFFNG665D,"45.691905713524704, 4.8625950959886985",0.0,,0.261057,,
6750,sole_bdg_intersects_roof_enough,[partial_roof],444872,Polygon,"[[[4.812596000553793, 45.7678339995692], [4.81...",4V1Z2XRJQQKD,"45.76780099324617, 4.812592457493956",0.0,,0.271969,,
6537,sole_bdg_intersects_roof_enough,[partial_roof],438712,Polygon,"[[[4.82304700024689, 45.767157000253306], [4.8...",S5QMZHDX9687,"45.767147424179, 4.822985473275821",0.0,,0.260099,,
87,sole_bdg_intersects_roof_enough,[partial_roof],42121,Polygon,"[[[4.858575877910486, 45.69966152344142], [4.8...",6QJBEEJXNFJC,"45.699734499254404, 4.858596478410701",0.0,,0.283968,,
5150,sole_bdg_intersects_roof_enough,[partial_roof],416341,Polygon,"[[[4.827244999640476, 45.764915000382196], [4....",TYZMKCS6NJNP,"45.76486409762315, 4.827260371602426",0.0,,0.277543,,
