In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### **Import packages**

In [25]:
from lib import *
from fun import *

### **Metavariables**

In [3]:
shapefile = "data/polygons/TG00CAGRP.shp"
dffile = "data/original_df.csv"
buffer_distance_in_meters = 50

### **Loading data**

In [4]:
blocks = fiona.open(shapefile)
df = pd.read_csv(dffile)

### **Transforming data**

##### Polygons Data

In [5]:
# Polygons pour chaque block groupe
blocks_polygons = [shape(item["geometry"]) for item in blocks]

In [6]:
# Polygons sans trous pour chaque block groupe
blocks_polygons_filled = []
for polygonid,item in enumerate(blocks_polygons):

    if item.geom_type == "MultiPolygon":
        res = MultiPolygon(Polygon(subitem.exterior) for subitem in item)

    if item.geom_type == "Polygon":
        res = Polygon(item.exterior)

    res.polygonid = polygonid
    blocks_polygons_filled.append(res)

In [7]:
# Polygons buffered de 50m pour chaque block groupe
blocks_polygons_buffered = [Buffer_A_Shape(item,
                                           buffer_distance_in_meters) for item in blocks_polygons_filled]

##### Kaggle Data

In [8]:
# Ajout d'un id row au df original
df["rowid"] = np.arange(len(df))

In [9]:
# Construction d'un df collapsed, une row, une unité spatiale
dicagg = {"housing_median_age" : "mean",
         "total_rooms" : "mean",
         "population" : "mean",
         "households" : "mean",
         "median_income" : "mean",
         "median_house_value" : "mean",
         "rowid" : list,
         "ocean_proximity" : set}

df_points_collapsed = df.groupby(["longitude","latitude"]).agg(dicagg).reset_index()
df_points_collapsed["blockid"] = np.arange(len(df_points_collapsed))

### **Correspondance blockid -> polygonid**

In [10]:
centroids_list = []
for idrow, row in df_points_collapsed[["longitude","latitude","blockid"]].iterrows():
    centroids_list.append(Point(*row[:2]))
    
tree_polygons_blocks = STRtree(blocks_polygons_filled)
polygons_candidates = [tree_polygons_blocks.query(item) for item in centroids_list]
correspondance = []

for pt,polyst in zip(centroids_list,polygons_candidates):
    if len(polyst) == 1:
        res = polyst[0].polygonid
    if len(polyst) == 0:
        res = -999
    if len(polyst) > 1:
        temp = np.array([pt.distance(candidat.centroid) for candidat in polyst])
        idxmin = np.argmin(temp)
        res = polyst[idxmin].polygonid
    
    correspondance.append(res)

### **Création de features**

In [11]:
df_points_collapsed["median_house_value"] = (df_points_collapsed["median_house_value"] / 1000).astype(int)

In [12]:
df_points_collapsed["polygonid"] = pd.Series(correspondance)
df_points_collapsed["numbering"] = df_points_collapsed.groupby(["polygonid"]).cumcount()
df_points_collapsed = df_points_collapsed[(df_points_collapsed.numbering == 0) & (df_points_collapsed.polygonid != -999)]

In [13]:
area_array = [blocks_polygons[item].area  for item in df_points_collapsed.polygonid.values]
df_points_collapsed["area"] = area_array

In [19]:
income_array = df_points_collapsed.median_income.values
income_array = stats.rankdata(income_array)
income_array = income_array - min(income_array)
income_array = income_array / (max(income_array)- min(income_array)) 
minval = min(income_array)
maxval = max(income_array)
income_color_array = ColorMapper(minval,maxval,income_array,"RdYlGn")
df_points_collapsed["income_color"] = income_color_array

In [20]:
house_array = df_points_collapsed.median_house_value.values
house_array = stats.rankdata(house_array)
house_array = house_array - min(house_array)
house_array = house_array / (max(house_array)- min(house_array)) 
minval = min(house_array)
maxval = max(house_array)
house_color_array = ColorMapper(minval,maxval,house_array,"RdYlGn")
df_points_collapsed["house_color"] = house_color_array

In [21]:
delta_array = house_array - income_array
minval = min(delta_array)
maxval = max(delta_array)
delta_color_array = ColorMapper(minval,maxval,delta_array,"RdYlGn")
df_points_collapsed["delta_color"] = delta_color_array
df_points_collapsed["delta"] = np.round(delta_array,2)
# delta, si >0, alors block group contient logement cher comparé à l'income (immobilier hors de prix)
# delta, si <0 alors block group contient logement peu cher comparé à l'income (immobilier bon marché)

In [22]:
df_points_collapsed["households_density"] = np.round(((1000000 * df_points_collapsed.area) / df_points_collapsed.households),2)
df_points_collapsed["rooms_by_household"] = np.round(df_points_collapsed.total_rooms / df_points_collapsed.households,2)
df_points_collapsed["people_by_households"] = np.round(df_points_collapsed.population / df_points_collapsed.households,2)
df_points_collapsed["rooms_by_people"] = np.round(df_points_collapsed.total_rooms / df_points_collapsed.population,2)

### **Création du geojson**

In [26]:
float_columns = ["longitude",
                 "latitude",
                 "housing_median_age",
                 "total_rooms",
                 "population",
                 "households",
                 "median_income",
                 "median_house_value",
                 "households_density",
                 "rooms_by_household",
                 "people_by_households",
                 "rooms_by_people"]

int_columns = ["blockid","polygonid","numbering"]

features = []

for rowid,polygonid in enumerate(df_points_collapsed.polygonid.values[:]):    
    data = df_points_collapsed.iloc[rowid].to_dict()
    for key,value in data.items():
        if key in float_columns:
            data[key] = round(float(value),2)
        if key in int_columns:
            data[key] = int(value)
        if key == "ocean_proximity":
            data[key] = list(value)[0]
        if key == "rowid":
            data[key] = "_".join([str(item) for item in value])
    data["rowid"] = rowid
    data["fillcolor"] = data["delta_color"]
    data["fill_opacity"] = 0.66
    data["color"] = "white"
    data["opacity"] = 0
    feature = {"type":"Feature",
               "properties":data,
               "geometry":shapely.geometry.mapping(blocks_polygons_filled[polygonid])}
    features.append(feature)
final_geojson = {"type": "FeatureCollection","features" : features}

### **Création de la map**

In [27]:
barycentre = np.mean(df_points_collapsed[["latitude","longitude"]].values,axis=0)
m = folium.Map(location = barycentre, zoom_start= 6,width="66%",tiles="Cartodbdark_matter") 

stylefun = lambda x: {'fillColor' : x['properties']['fillcolor'],
                      'fillOpacity' : x['properties']['fill_opacity'],
                     'color' : x['properties']['color'],
                     'opacity' : x['properties']['opacity']}

folium.GeoJson(data=final_geojson,
               style_function= stylefun,
               smooth_factor=5,
               tooltip=folium.features.GeoJsonTooltip(fields=["rowid",
                                                              'median_income',
                                                              "delta",
                                                              "housing_median_age",
                                                              "total_rooms",
                                                              "population",
                                                              "households",
                                                              "households_density",
                                                              "rooms_by_household",
                                                              "people_by_households",
                                                              "rooms_by_people",
                                                              "median_house_value"],
                                                      aliases=["rowid : ",
                                                               "Salaire : ",
                                                               "Delta : ",
                                                               "Age : ",
                                                               "Nombre de pièces : ", 
                                                               "Pop. : ",
                                                               "Nombre de ménages : ",
                                                               "Surface / ménages : ",
                                                               "N. pièces / ménage : ",
                                                               "N. pers. / ménage : ",
                                                               "N. pièces / pers. : ",
                                                               "Prix immobilier (TARGET) : "])).add_to(m)


plugins.Fullscreen().add_to(m)
plugins.MeasureControl().add_to(m)
folium.TileLayer('openstreetmap').add_to(m)
folium.TileLayer('Stamen Terrain').add_to(m)
folium.LayerControl().add_to(m)

<folium.map.LayerControl at 0x7f0da749a340>