In [1]:
# Importations de packages
import numpy as np
import geopandas as gpd
import folium
import src.traitements as traitements
import pandas as pd

In [2]:
from src.config import *
from src.utils import make_path, check_dir
from src.traitements import AppariementRunner

### Paramètres

In [3]:
r = 25000.0 # rayon de la zone d'étude en mètres
radius_name = int(r/1000)
roi_name = METRO_NAME.lower()

# Paramétrage de la période d'étude
date_start = '2013-01-01' # date de début
date_end   = '2023-01-01' # date de fin
time_period_start = date_start[:4]
time_period_end   = date_end[:4]

## Pipeline traitements

In [4]:
warehouses_t0 = AppariementRunner(
    centroid=CENTER,
    roi_name=roi_name,
    radius=r).run(date_analysis=date_start)

2024-03-07 14:51:48,402 - INFO ::  Process Siren file
2024-03-07 14:54:15,366 - INFO ::  SIREN : (16834, 5)
2024-03-07 14:54:16,648 - INFO ::  Siren : /home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/SIREN/SIREN_Entrepots_2013-01-01.csv
2024-03-07 14:54:16,650 - INFO ::  Process GeoSiren file
2024-03-07 14:54:16,651 - INFO ::  Load GeoSiren file...


Function TraitementSiren Took 148.2461 seconds


2024-03-07 14:54:55,522 - INFO ::  GeoSiren file loaded !
2024-03-07 14:54:56,992 - INFO ::  Communes on buffer...
2024-03-07 14:55:42,184 - INFO ::  Join geosiren on roi..
2024-03-07 14:56:10,232 - INFO ::  Save geosiren on roi.. : Index(['siret', 'x', 'y', 'epsg', 'geometry'], dtype='object')
2024-03-07 14:57:18,750 - INFO ::  GEOSIREN : (1080181, 5)
2024-03-07 14:57:19,740 - INFO ::  Geosiren : /home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/lyon/2013/SIREN/GeoSiren_lyon_25km.gpkg
2024-03-07 14:57:19,740 - INFO ::  Merge Siren and GeoSiren


Function TraitementGeoSiren Took 183.0905 seconds
siren : (16834, 5)
geosiren : (1080181, 5)
Index(['siret', 'dateFin', 'dateDebut', 'activitePrincipaleEtablissement',
       'nomenclatureActivitePrincipaleEtablissement', 'x', 'y', 'epsg',
       'geometry'],
      dtype='object')


2024-03-07 14:58:56,260 - INFO ::  MERGE SIREN : (778, 9)
2024-03-07 14:58:56,485 - INFO ::  Merge : /home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/lyon/2013/Entrepots/Entrepots_lyon_2013_25km.gpkg
2024-03-07 14:58:56,486 - INFO ::  Appariement processing...


Function JoinSirenGeosiren Took 96.7440 seconds


2024-03-07 14:59:21,965 - INFO ::  Lines dist_min : (544, 2)
2024-03-07 14:59:23,036 - INFO ::  Appariement done !


Function AppSirenBDTopo Took 26.5703 seconds
Function run Took 454.6548 seconds


In [5]:
warehouses_t1 = AppariementRunner(
    centroid=CENTER,
    roi_name=roi_name,
    radius=r).run(date_analysis=date_end)

2024-03-07 14:59:23,065 - INFO ::  Process Siren file
2024-03-07 15:01:56,748 - INFO ::  SIREN : (25628, 5)
2024-03-07 15:01:57,918 - INFO ::  Siren : /home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/SIREN/SIREN_Entrepots_2023-01-01.csv
2024-03-07 15:01:57,921 - INFO ::  Process GeoSiren file
2024-03-07 15:01:57,921 - INFO ::  Load GeoSiren file...


Function TraitementSiren Took 154.8528 seconds


2024-03-07 15:02:34,425 - INFO ::  GeoSiren file loaded !
2024-03-07 15:02:35,709 - INFO ::  Communes on buffer...
2024-03-07 15:03:04,218 - INFO ::  Join geosiren on roi..
2024-03-07 15:03:30,590 - INFO ::  Save geosiren on roi.. : Index(['siret', 'x', 'y', 'epsg', 'geometry'], dtype='object')
2024-03-07 15:04:32,091 - INFO ::  GEOSIREN : (1080181, 5)
2024-03-07 15:04:32,874 - INFO ::  Geosiren : /home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/lyon/2023/SIREN/GeoSiren_lyon_25km.gpkg
2024-03-07 15:04:32,876 - INFO ::  Merge Siren and GeoSiren


Function TraitementGeoSiren Took 154.9545 seconds
siren : (25628, 5)
geosiren : (1080181, 5)


2024-03-07 15:06:11,294 - INFO ::  MERGE SIREN : (1143, 9)


Index(['siret', 'dateFin', 'dateDebut', 'activitePrincipaleEtablissement',
       'nomenclatureActivitePrincipaleEtablissement', 'x', 'y', 'epsg',
       'geometry'],
      dtype='object')


2024-03-07 15:06:11,576 - INFO ::  Merge : /home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/lyon/2023/Entrepots/Entrepots_lyon_2023_25km.gpkg
2024-03-07 15:06:11,577 - INFO ::  Appariement processing...


Function JoinSirenGeosiren Took 98.7004 seconds


2024-03-07 15:06:28,929 - INFO ::  Lines dist_min : (552, 2)
2024-03-07 15:06:29,584 - INFO ::  Appariement done !


Function AppSirenBDTopo Took 18.0191 seconds
Function run Took 426.5311 seconds


In [6]:
warehouses_t0 = warehouses_t0.drop_duplicates(keep="first")
warehouses_t1 = warehouses_t1.drop_duplicates(keep="first")

In [7]:
print(f"WH {time_period_start} : {warehouses_t0.shape}")
print(f"WH {time_period_end} : {warehouses_t1.shape}")

WH 2013 : (224, 3)
WH 2023 : (233, 3)


In [63]:
len(set(warehouses_t0.ID).intersection(set(warehouses_t1.ID)))

110

In [64]:
gpd.sjoin(warehouses_t0, warehouses_t1).shape

(122, 6)

### Chargement des données

In [8]:
communes_t0_path = os.path.join(
    communes_roi_dir.format(roi_name, time_period_start),
    communes_roi_file_name.format(roi_name, time_period_start)
)

communes_t1_path = os.path.join(
    communes_roi_dir.format(roi_name, time_period_end),
    communes_roi_file_name.format(roi_name, time_period_end)
)

In [9]:
communes_t0 = gpd.read_file(communes_t0_path)
communes_t1 = gpd.read_file(communes_t0_path)

## Visualisation

In [10]:
studied_area = communes_t1.dissolve()

In [11]:
m = studied_area.explore(
    scheme='naturalbreaks',
    color='red',
    name='studied_area'
)
warehouses_t0.explore(
    m=m,
    color='green',
    name='warehouses_t0'
)
warehouses_t1.explore(
    m=m,
    color='blue',
    name='warehouses_t1'
)
folium.LayerControl().add_to(m)
m

In [12]:
m = communes_t1.drop_duplicates(subset=['NOM']).dissolve().explore(
    scheme='naturalbreaks',
    color='red',
    name='communes_t1'
)
m = communes_t0.drop_duplicates(subset=['NOM']).dissolve().explore(
    m=m,
    color='blue',
    name='communes_t0'
)
folium.LayerControl().add_to(m)
m

In [13]:
print(f"Communes {time_period_start} : {communes_t0.ID.nunique()}")
print(f"Communes {time_period_end} : {communes_t1.ID.nunique()}")

Communes 2013 : 221
Communes 2023 : 221


In [14]:
print(f"Communes {time_period_start} : {communes_t0.dissolve().area.item()/1e6: .2f}")
print(f"Communes {time_period_end} : {communes_t1.dissolve().area.item()/1e6: .2f}")

Communes 2013 :  2386.71
Communes 2023 :  2386.71


## Stats

List of variables from (BDD) - **Variables retenues**

* Name of studied metro area	- done metro
* Size of studied metro area (km2)	 -done area
* Number of municipalities	- done number_mun

* Name of warehouse data source	- done
* Time period studied for logistics sprawl analysis	- done
* Number of years analysed	- done
* First year of study (T=0)	- done : time_period_start
* T=0 Population (millions)	- done
* T=0 Population density (inhabitants/km2)	- done density_pop_km2
* T=0  Number of warehouses	- done number_ware
* T=0 Number of warehouses per million people	- done number_ware_per_popM
* T=0 Number of warehouses per 1000 km2 	- done number_ware_per_1000km2
* T=0 Average size of warehouses (m2) - done avg_size_ware
* Last year of study (T=N)	- done - time_period_end
* T=N Population (millions)	- done
* T=N Population density (inhabitants/km2)	- done
* T=N  Number of warehouses	- done
* T=N Number of warehouses per million people - done
* T=N Number of warehouses per 1000 km2 	- done
* T=N Average size of warehouses (m2)	- done
* Change in population over the years (millions) 	- done pop_change
* T=0 Average distance of warehouses to centre of gravity (km) 	- done gravity
* T=N Average distance of warehouses to centre of gravity (km) 	- done 
* Change in average distance of WHs to centre of gravity (over the years) (km)	- done  gravity_change
* Logistic sprawl (km/year)	                 - done log_sprawl_measure
* Change in number of WH over the years	             - done number_ware_change
* % change in number of WH over the years	  - done perc_ware_change
* Change of warehouses per million people over the years	- done number_ware_per_popM_change
* Logistic sprawl   - bool **?**
* Surfaces area data availability	- done surfaces_area_available (bool=True)

Not fill 
* Type of metropolitan area	- **?**
* Type of land use control 	**?**
* Gateway       **?**
* Megaregion	**?**
* Name of Megaregion	**?** - region ?
* Type of city	- qualif
* Focused Study	 **?**
* Interviews	**?**


Not used - maybe HS: 

* Urban Rent Prices per year (EUR/m2)	
* Suburban Rent Prices per year (EUR/m2)	
* % of increase in rent price of suburban compared to urban rent	
* Increase in rent price of suburban compared to urban rent	
* Logistic Sprawl analysis

In [15]:
# variables in slides - not followed, seems very closed to bdd 

# ????
#"sprawl_year":" Logistics sprawl per year.",
#"quad":"A categorical variable indicates the metropolitan area's quadrant based on its yearly sprawl level and differential warehouse rental prices.",


"""var_name = {
    "metro_name":"Name of the metropolitan area",
    "f":"Territorial area",
    "f":"Number of municipalities,
    "f":"Location in a megaregion,
    "f":"Classified as a gateway metro",
    "f":"Morphology (polycentric or monocentric)",
    "f":"Population",
    "f":"Population density(inhabitants/km2)",
    "f":"Name of the warehouse datasource",
    "f":"Time period for logistics sprawl analysis",
    "f":"Number of warehouses",
    "f":"Number of warehouses per million people",
    "f":"Number of warehouses per1000 km2",
    "f":"Average distance of warehouses to gravity center (km)",
    "f":"Logistic sprawl: Change in average distance of WHs to centre of gravity (over the years) (km)",
    "f":"Urban and suburban rent prices per year (EUR/m2)",
}"""

var_name = {
    "metro": "The name of the metropolitan area.",
    "mega_region": "The name of the mega-region to which the metropolitan area belongs.",
    "country": "The name of the country of the metropolitan area is located.",
    "continent": "The name of the continent in which the metropolitan area is located.",
    "data_sources": "The sources of data used to compile this dataset.",
    "area":"(km2) The total area of the metropolitan area in square kilometers.",
    "number_mun":" The number of municipalities included in the metropolitan area.",
    "size": "The size of the metropolitan area (small, medium, or large).",
    "urban_centrality": "Categories for urban morphology (polycentricity or monocentricity) of the metropolitan area.",
    "gateway":" Whether the metropolitan area is considered a gateway city.",
    "time_period_start": "The start year of the period covered by the dataset.",
    "time_period_end": "The end year of the period covered by the dataset.",
    "years_data": "The number of years covered by the dataset.",
    "population_t0": "The population of the metropolitan area at the start of the period covered by the dataset.",
    "number_ware_t0":" The number of warehouses in the metropolitan area at the start of the period covered by the dataset.",
    "gravity_t0":" Centrographic measure of the metropolitan area at the start of the period covered by the dataset.",
    "population_t1": "The population of the metropolitan area at the end of the period covered by the dataset.",
    "number_ware_t1": "The number of warehouses in the metropolitan area at the end of the period covered by the dataset.",
    "gravity_t1":" Centrographic measure of the metropolitan area at the end of the period covered by the dataset.",
    "log_sprawl_measure":" Logistics sprawl measure in the metropolitan area.",
    "avg_price":" The average price of logistics real estate in the metropolitan area.",
    "central": "Whether the observation is in the central area of the metropolitan area.",
    "suburban": "Whether the observation is in the suburban area of the metropolitan area.",
    "diff": "The difference between the average price of real estate in central and suburban areas of the metropolitan area.",
    "sprawl_year":" Logistics sprawl per year.",
    "quad":"A categorical variable indicates the metropolitan area's quadrant based on its yearly sprawl level and differential warehouse rental prices.",
}

In [45]:
from shapely import Point
from typing import Tuple


def compute_statistics(wh_t0, wh_t1, communes_t0, communes_t1, name, period: Tuple[str]):

    period = list(map(int, period))
    global_stats = global_statistics(name, period)
    # get the last date for area | maybe compute for both is better
    area_stats = area_statistics(communes_t1)
    evolution_stats = evoluton_statistics(wh_t0, wh_t1, communes_t0, communes_t1, period)
    tot_stats = {**global_stats, **area_stats, **evolution_stats}

    return tot_stats
    

    

def temporal_based_statistics(wh_df, 
                       communes, 
                       suffix): 

    pop = communes["POPUL"].sum()
    area = communes.unary_union.area
    n_wh = wh_df.ID.nunique()
    unitpop = 1e6

    wh_centroid = np.mean(wh_df.centroid.x), np.mean(wh_df.centroid.y)
    
    stats = {
        f"population_{suffix}": np.round(pop/unitpop, 2),
        f"density_pop_km2_{suffix}": np.round(pop / (area / 1e6), 2),
        f"number_ware_{suffix}": n_wh,
        f"number_ware_per_popM_{suffix}": np.round(n_wh / np.round(pop/unitpop, 2)), 
        f"number_ware_per_1000km2_{suffix}":n_wh / (area / 1000),
        f"avg_size_ware_{suffix}": np.round(wh_df.geometry.area.mean(), 2), 
        f"gravity_{suffix}":  np.round(np.mean(wh_df.distance(Point(wh_centroid))) / 1000, 2)
    }
    
    return stats

def evoluton_statistics(wh_t0, wh_t1, communes_t0, communes_t1, period=Tuple[int]): 

    stats_t0 = temporal_based_statistics(wh_t0, communes_t0, suffix="t0")
    stats_t1 = temporal_based_statistics(wh_t1, communes_t1, suffix="t1")

    stats = {
        "pop_change": (stats_t1["population_t1"] - stats_t0["population_t0"]), 
        "gravity_change": (stats_t1["gravity_t1"] - stats_t0["gravity_t0"]),
        "number_ware_change": (stats_t1["number_ware_t1"] - stats_t0["number_ware_t0"]),
        "perc_ware_change": ((stats_t1["number_ware_t1"] - stats_t0["number_ware_t0"]) / stats_t0["number_ware_t0"]),
        "number_ware_per_popM_change": (stats_t1["number_ware_per_popM_t1"] - stats_t0["number_ware_per_popM_t0"]),
        "log_sprawl_measure": (stats_t1["gravity_t1"] - stats_t0["gravity_t0"]) / (period[1] - period[0]),
    }

    stats = {**stats_t0, **stats_t1,**stats}    

    return stats

def global_statistics(name, period=Tuple[int]):

    
    stats = {
        "metro":name,
        "mega_region":None, 
        "country":"France", 
        "continent": "Europe", 
        "data_sources": ','.join(["BDTOPO_IGN", "SIREN"]), 
        "time_period_start": period[0], 
        "time_period_end": period[1],
        "years_data": (period[1] - period[0]),
        "surfaces_area_available":True ,# suppose
        "urban_centrality":None, 
        "gateway": None,
    }

    return stats

def area_statistics(communes): 

    stats = {
        "area": np.round(communes.unary_union.area / 1e6, 1),
        "number_mun":communes.ID.nunique(),
    }
    
    return stats


#### Compute

In [46]:
result = compute_statistics(wh_t0=warehouses_t0, 
                            wh_t1=warehouses_t1, 
                            communes_t0=communes_t0, 
                            communes_t1=communes_t1, 
                            name=roi_name, 
                            period=(time_period_start, time_period_end))

In [47]:
warehouses_t0.ID.nunique()

224

In [48]:
warehouses_t1.ID.nunique()

233

In [49]:
df = pd.DataFrame(result, index=[0])
out_dir = check_dir(project_path, "reports", roi_name)
df.to_csv(make_path(f"statistics_{time_period_start}_{time_period_end}.csv", out_dir), index=False)

In [50]:
result

{'metro': 'lyon',
 'mega_region': None,
 'country': 'France',
 'continent': 'Europe',
 'data_sources': 'BDTOPO_IGN,SIREN',
 'time_period_start': 2013,
 'time_period_end': 2023,
 'years_data': 10,
 'surfaces_area_available': True,
 'urban_centrality': None,
 'gateway': None,
 'area': 2386.7,
 'number_mun': 221,
 'population_t0': 2.08,
 'density_pop_km2_t0': 872.92,
 'number_ware_t0': 224,
 'number_ware_per_popM_t0': 108.0,
 'number_ware_per_1000km2_t0': 9.385319130154298e-05,
 'avg_size_ware_t0': 7604.11,
 'gravity_t0': 9.95,
 'population_t1': 2.08,
 'density_pop_km2_t1': 872.92,
 'number_ware_t1': 233,
 'number_ware_per_popM_t1': 112.0,
 'number_ware_per_1000km2_t1': 9.76240784520514e-05,
 'avg_size_ware_t1': 8925.48,
 'gravity_t1': 11.75,
 'pop_change': 0.0,
 'gravity_change': 1.8000000000000007,
 'number_ware_change': 9,
 'perc_ware_change': 0.04017857142857143,
 'number_ware_per_popM_change': 4.0,
 'log_sprawl_measure': 0.18000000000000008}

### Remarques

* 'number_ware_per_1000km2_t0': 9.385319130154298e-05 tres faible (zone tres grande par rapport aux zones des études ?)
* prendre 2022 si la nomnenclature est la même que 2013 ?
* communes 2013 == communes 2023 ?

In [23]:
communes_t0_path

'/home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/lyon/2013/BDTOPO/communes_lyon_2013.gpkg'

In [24]:
communes_t1_path

'/home/rustt/Documents/IGAST/2_PROJETS/Projet_analyse_spatiale/logistics_sprawl/data/processed/lyon/2023/BDTOPO/communes_lyon_2023.gpkg'

In [26]:
communes_t0.shape

(294, 12)

In [27]:
communes_t1.shape

(294, 12)

In [28]:
pd.concat([communes_t0, communes_t1], ignore_index=True, axis=0).drop_duplicates().shape

(221, 12)

In [30]:
len(set(communes_t0.NOM).intersection(set(communes_t0.NOM)))

221

Pas les mêmes communes mais le même nombre d'habitants

In [32]:
print(communes_t0.POPUL.sum()) 
print(communes_t1.POPUL.sum())

2083407
2083407


In [37]:
np.round(2083407 / (communes_t0.area.sum()/1000), 2)

0.63

In [44]:
communes_t0.area.sum()/1000

3315404.177625011

### Pipeline stats

# Annexes

In [59]:
warehouses_t1[warehouses_t1.ID == "BATIMENT0000002223168854"]

Unnamed: 0,ID,NATURE,geometry
753,BATIMENT0000002223168854,"Industriel, agricole ou commercial","POLYGON Z ((857595.300 6521749.700 209.700, 85..."
3974,BATIMENT0000002223168854,"Industriel, agricole ou commercial","POLYGON Z ((857595.300 6521749.700 209.700, 85..."
8812,BATIMENT0000002223168854,"Industriel, agricole ou commercial","POLYGON Z ((857595.300 6521749.700 209.700, 85..."


In [58]:
# exemple of duplicates ID
warehouses_t1[warehouses_t1.ID == "BATIMENT0000002223168854"].explore(style_kwds=dict(color="red", fill=False))


```python
# -- deprecated
# Étape 1 : traitements de la bdd SIREN
siren_ent_t0 = traitements.TraitementSiren(date_start)
siren_ent_t1 = traitements.TraitementSiren(date_end)

# Étape 2 : traitement de la bdd GeoSIREN
geosiren_buffer = traitements.TraitementGeoSiren(CENTER[0],
                                                 CENTER[1],
                                                 r,
                                                 roi_name,
                                                 time_period_end)

# Étape 3 : jointure de SIREN sur GeoSIREN
merged_siren_t0 = traitements.JoinSirenGeosiren(siren_ent_t0,geosiren_buffer,date_start,roi_name,int(r/1000))
merged_siren_t1 = traitements.JoinSirenGeosiren(siren_ent_t1,geosiren_buffer,date_end,roi_name,int(r/1000))

# Étape 4 : Appariement de SIREN et BDTOPO
warehouses_t0 = traitements.AppSirenBDTopo(roi_name, merged_siren_t0,time_period_start,roi_name,int(r/1000))
warehouses_t1 = traitements.AppSirenBDTopo(roi_name, merged_siren_t1,time_period_end,roi_name,int(r/1000))

warehouses_t0 = traitements.AppariementRunner(roi_name=roi_name,
                                  radius=r).run(date_analysis=date_start)

warehouses_t1 = traitements.AppariementRunner(roi_name=roi_name,
                                  radius=r).run(date_analysis=date_end)
```