In [1]:
import pickle
import os.path
import geopandas
from collections import defaultdict
from matplotlib import pyplot as plt
import pandas as pd

import geo

## Get probas and info about roofs

In [2]:
def get_batch(i_batch):
    probas_dir = '/server/var/data/OpenSolarMap/probas/chunks/'
    probas_filename = probas_dir + 'probas_{:04d}'.format(i_batch)
    
    if not os.path.isfile(probas_filename):
        return
    
    building_info = pickle.load(open(probas_filename, 'rb'))
    
    return building_info

In [3]:
building_info_all = {}

for i_batch in range(1410):
    building_info = get_batch(i_batch)
    
    if building_info:
        building_info_all.update(building_info)

In [4]:
ident_list = building_info_all.keys()
insee_list = [building_info_all[ident]['INSEE'] for ident in ident_list]
x_center_list = [building_info_all[ident]['x_center'] for ident in ident_list]
y_center_list = [building_info_all[ident]['y_center'] for ident in ident_list]
class_0_list = [building_info_all[ident]['probas'][0] for ident in ident_list]
class_1_list = [building_info_all[ident]['probas'][1] for ident in ident_list]
class_2_list = [building_info_all[ident]['probas'][2] for ident in ident_list]
class_3_list = [building_info_all[ident]['probas'][3] for ident in ident_list]

In [5]:
df = pd.DataFrame(index=ident_list, data = {
        'insee': insee_list,
        'x_center': x_center_list,
        'y_center': y_center_list,
        'class_0': class_0_list,
        'class_1': class_1_list,
        'class_2': class_2_list,
        'class_3': class_3_list,
        }
    )

In [6]:
df.to_csv('../building_probas.csv')

In [7]:
df

Unnamed: 0,class_0,class_1,class_2,class_3,insee,x_center,y_center
242163151,0.037239,0.459789,0.062222,0.440750,60686,656312.511589,6901681.584753
186069734,0.020745,0.861572,0.043419,0.074263,36044,602548.270058,6633932.520310
165323140,0.950507,0.006132,0.006008,0.037353,57628,994150.729495,6885855.816963
97758028,0.096858,0.848166,0.011252,0.043724,68117,1011015.342309,6780043.150458
145094031,0.056170,0.087757,0.018528,0.837544,40308,383236.306267,6292879.900353
231081127,0.041548,0.723313,0.018310,0.216829,42307,830149.781108,6495661.844682
47111124,0.015291,0.050433,0.848436,0.085840,69123,843470.115736,6517053.026119
154808871,0.280481,0.522076,0.059618,0.137825,38451,872171.570013,6520286.658009
175217148,0.096197,0.094227,0.035955,0.773621,94068,662440.041965,6855630.150467
140943421,0.030778,0.227334,0.276052,0.465836,93073,668684.436288,6873197.492450


In [8]:
len(df)

1373000

## Get info about communes

In [9]:
choropleth_map = geopandas.read_file('../communes-20150101-100m.shp')
choropleth_map.drop(36181, inplace=True)  # Landerneau

In [10]:
choropleth_map['centroid'] = choropleth_map.geometry.centroid

In [11]:
choropleth_map['centroid_x'] = choropleth_map.centroid.apply(lambda p: p.x)
choropleth_map['centroid_y'] = choropleth_map.centroid.apply(lambda p: p.y)

In [12]:
choropleth_map['centroid_x_L93'] = choropleth_map.centroid.apply(lambda p: geo.geo2carto(p.y, p.x)[0])
choropleth_map['centroid_y_L93'] = choropleth_map.centroid.apply(lambda p: geo.geo2carto(p.y, p.x)[1])

In [13]:
choropleth_map.to_csv('../communes_info.csv')

In [14]:
communes_info = pd.DataFrame(data={
        'insee': choropleth_map.insee,
        'nom': choropleth_map.nom,
        'surf_m2': choropleth_map.surf_m2,
        'centroid_x': choropleth_map.centroid_x,
        'centroid_y': choropleth_map.centroid_y,
        'centroid_x_L93': choropleth_map.centroid_x_L93,
        'centroid_y_L93': choropleth_map.centroid_y_L93,
    })

In [15]:
communes_info.to_csv('../communes_info_without_geo.csv')