This notebook is dedicated to the extraction and pre-processing of relevant data for the cartography and dynamic analysis of territorial polarisation in 1980-2020 France. 

In [1]:
### Importing libraries ###

import numpy as np
import json
import pandas as pd
import geopandas as gpd
import datetime

In [2]:
### Importing communal data ###

communal_geo = gpd.read_file("Raw data/communes-version-simplifiee.geojson")

In [3]:
### Building & exporting ZE1990 geography ###

with open("Raw data/ZE1990.csv") as ZE1990_comp_csv :
    ZE1990_comp = pd.read_csv(ZE1990_comp_csv, header=1, usecols=[0,4], names=["Code","ZE1990"])

ZE1990_geo = communal_geo.merge(ZE1990_comp,right_on="Code",left_on="code")
ZE1990_geo = ZE1990_geo[["ZE1990","geometry"]]
ZE1990_geo = ZE1990_geo[ZE1990_geo.geometry.notnull()]
ZE1990_geo = ZE1990_geo.dissolve("ZE1990")
ZE1990_geo = ZE1990_geo.reset_index()
ZE1990_geo = ZE1990_geo.to_crs({'init': 'epsg:3857'})

    ## Export
ZE1990_geo.to_file("Output/ZE1990_geo.geojson", driver="GeoJSON", index = "ZE1990")

    ## Setting index
ZE1990_geo = ZE1990_geo.set_index("ZE1990")

In [4]:
### Building & exporting ZE2010 geography ###
    
with open("Raw data/ZE2010.csv") as ZE2010_comp_csv :
    ZE2010_comp = pd.read_csv(ZE2010_comp_csv, header=5, usecols=[0,2], names=["Code","ZE2010"])

ZE2010_geo = communal_geo.merge(ZE2010_comp,right_on="Code",left_on="code")
ZE2010_geo = ZE2010_geo[["ZE2010","geometry"]]
ZE2010_geo = ZE2010_geo[ZE2010_geo.geometry.notnull()]
ZE2010_geo = ZE2010_geo.dissolve("ZE2010")
ZE2010_geo = ZE2010_geo.reset_index()
ZE2010_geo = ZE2010_geo.to_crs({'init': 'epsg:3857'})

    ## Export
ZE2010_geo.to_file("Output/ZE2010_geo.geojson", driver="GeoJSON")

    ## Setting index
ZE2010_geo = ZE2010_geo.set_index("ZE2010")

In [5]:
### Building & exporting distance matrix for ZE1990

dist_matrix_ZE1990 = pd.DataFrame(columns=ZE1990_geo.index, index=ZE1990_geo.index)
dist_matrix_ZE1990 = dist_matrix_ZE1990.apply(lambda x: ZE1990_geo.geometry.centroid.distance(ZE1990_geo.loc[x.name].geometry.centroid)/1000)
dist_matrix_ZE1990.to_json("Output/dist_matrix_ZE1990.json")

In [6]:
### Building & exporting distance matrix for ZE2010

dist_matrix_ZE2010 = pd.DataFrame(columns=ZE2010_geo.index, index=ZE2010_geo.index)
dist_matrix_ZE2010 = dist_matrix_ZE2010.apply(lambda x: ZE2010_geo.geometry.centroid.distance(ZE2010_geo.loc[x.name].geometry.centroid)/1000)
dist_matrix_ZE2010.to_json("Output/dist_matrix_ZE2010.json")

In [7]:
### Building & exporting timeseries from 2001 to 2008
 
dates = range(2001,2009)

nUC_timeseries_ZE1990 = pd.DataFrame(columns = ZE1990_geo.index, index=dates)
med_timeseries_ZE1990 = pd.DataFrame(columns = ZE1990_geo.index, index=dates)
intQ_timeseries_ZE1990 = pd.DataFrame(columns = ZE1990_geo.index, index=dates)

for i in dates:
    with open("Raw data/FILO" + str(i) + "_DEC_ZE1990.csv") as ZE_revenue_csv :

        ## load from 2001 to 2006        
        if i in range(2001,2007):
            ZE_data = pd.read_csv(ZE_revenue_csv, header = 6, usecols=[0,5,7,18], names = ["ZE1990","nUC","med","intQ"])

        ## load from 2007 to 2008
        elif i in range(2007,2009):
            ZE_data = pd.read_csv(ZE_revenue_csv, header = 6, usecols=[0,3,5,16], names = ["ZE1990","nUC","med","intQ"])
            
        ZE_data = ZE_data.set_index("ZE1990")        
        
        nUC_timeseries_ZE1990.loc[i]=ZE_data["nUC"]
        med_timeseries_ZE1990.loc[i]=ZE_data["med"]
        intQ_timeseries_ZE1990.loc[i]=ZE_data["intQ"]
                
nUC_timeseries_ZE1990.to_json("Output/nUC_timeseries_ZE1990.json")
med_timeseries_ZE1990.to_json("Output/med_timeseries_ZE1990.json")
intQ_timeseries_ZE1990.to_json("Output/intQ_timeseries_ZE1990.json")

In [8]:
### Building & exporting timeseries from 2009 to 2016
 
dates = range(2009,2017)

nUC_timeseries_ZE2010 = pd.DataFrame(columns = ZE2010_geo.index, index=dates)
med_timeseries_ZE2010 = pd.DataFrame(columns = ZE2010_geo.index, index=dates)
intQ_timeseries_ZE2010 = pd.DataFrame(columns = ZE2010_geo.index, index=dates)


for i in dates:
    with open("Raw data/FILO" + str(i) + "_DEC_ZE2010.csv") as ZE_revenue_csv :
        
        ## load from 2009 to 2011        
        if i in range(2009,2012):
            ZE_data = pd.read_csv(ZE_revenue_csv, header = 6, usecols=[0,3,5,16], names = ["ZE2010","nUC","med","intQ"])
        
        ## load from 2012 to 2016
        elif i in range(2012,2017):
            ZE_data = pd.read_csv(ZE_revenue_csv, header = 5, usecols=[0,4,7,18], names = ["ZE2010","nUC","med","intQ"])

        ZE_data = ZE_data.set_index("ZE2010")

        nUC_timeseries_ZE2010.loc[i]=ZE_data["nUC"]
        med_timeseries_ZE2010.loc[i]=ZE_data["med"]
        intQ_timeseries_ZE2010.loc[i]=ZE_data["intQ"]

nUC_timeseries_ZE2010.to_json("Output/nUC_timeseries_ZE2010.json")
med_timeseries_ZE2010.to_json("Output/med_timeseries_ZE2010.json")
intQ_timeseries_ZE2010.to_json("Output/intQ_timeseries_ZE2010.json")

In [9]:
### Building & exporting growthseries from 2001 to 2008

nUC_growthseries_ZE1990 = nUC_timeseries_ZE1990.apply(lambda x: x/x.shift(1)).dropna().applymap(lambda x: np.log(x))
med_growthseries_ZE1990 = med_timeseries_ZE1990.apply(lambda x: x/x.shift(1)).dropna().applymap(lambda x: np.log(x))
intQ_growthseries_ZE1990 = intQ_timeseries_ZE1990.apply(lambda x: x/x.shift(1)).dropna().applymap(lambda x: np.log(x))

nUC_growthseries_ZE1990.to_json("Output/nUC_growthseries_ZE1990.json")
med_growthseries_ZE1990.to_json("Output/med_growthseries_ZE1990.json")
intQ_growthseries_ZE1990.to_json("Output/intQ_growthseries_ZE1990.json")



In [10]:
### Building & exporting growthseries from 2009 to 2019

nUC_growthseries_ZE2010 = nUC_timeseries_ZE2010.apply(lambda x: x/x.shift(1)).dropna().applymap(lambda x: np.log(x))
med_growthseries_ZE2010 = med_timeseries_ZE2010.apply(lambda x: x/x.shift(1)).dropna().applymap(lambda x: np.log(x))
intQ_growthseries_ZE2010 = intQ_timeseries_ZE2010.apply(lambda x: x/x.shift(1)).dropna().applymap(lambda x: np.log(x))

nUC_growthseries_ZE2010.to_json("Output/nUC_growthseries_ZE2010.json")
med_growthseries_ZE2010.to_json("Output/med_growthseries_ZE2010.json")
intQ_growthseries_ZE2010.to_json("Output/intQ_growthseries_ZE2010.json")