The purpose of this notebook is to expose the causal structure underlying territorial polarisation in 1980-2020 France through canonical regression methods. The causal models we will discover will help discriminating between numerous qualitative assesments of this dynamics.

It is essentially based on the statsmodels library (Seabold, S., & Perktold, J. (2010). Statsmodels: Econometric and Statistical Modeling with Python. Proceedings of the 9th Python in Science Conference, 2010.).

In [1]:
    ### Importing libraries ###

import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt

import geopandas as gpd
import statsmodels.api as sm
from statsmodels.graphics.regressionplots import plot_ccpr_grid


In [2]:
### Importing ZE1990 data ###

area_ZE1990 = pd.read_json("../Data processing/Output/area_ZE1990.json", typ="series")
dist_matrix_ZE1990 = pd.read_json("../Data processing/Output/dist_matrix_ZE1990.json")

dUC_timeseries_ZE1990 = pd.read_json("../Data processing/Output/dUC_timeseries_ZE1990.json")
d1_timeseries_ZE1990 = pd.read_json("../Data processing/Output/d1_timeseries_ZE1990.json")
med_timeseries_ZE1990 = pd.read_json("../Data processing/Output/med_timeseries_ZE1990.json")
d9_timeseries_ZE1990 = pd.read_json("../Data processing/Output/d9_timeseries_ZE1990.json")
moy_timeseries_ZE1990 = pd.read_json("../Data processing/Output/moy_timeseries_ZE1990.json")
intQ_timeseries_ZE1990 = pd.read_json("../Data processing/Output/intQ_timeseries_ZE1990.json")


ValueError: Expected object or value

In [None]:
### Importing ZE2010 data ###

area_ZE2010 = pd.read_json("../Data processing/Output/area_ZE2010.json", typ="series")
dist_matrix_ZE2010 = pd.read_json("../Data processing/Output/dist_matrix_ZE2010.json")

dUC_timeseries_ZE2010 = pd.read_json("../Data processing/Output/dUC_timeseries_ZE2010.json")
d1_timeseries_ZE2010 = pd.read_json("../Data processing/Output/d1_timeseries_ZE2010.json")
med_timeseries_ZE2010 = pd.read_json("../Data processing/Output/med_timeseries_ZE2010.json")
d9_timeseries_ZE2010 = pd.read_json("../Data processing/Output/d9_timeseries_ZE2010.json")
## moy_timeseries_ZE2010 = pd.read_json("../Data processing/Output/moy_timeseries_ZE2010.json")
intQ_timeseries_ZE2010 = pd.read_json("../Data processing/Output/intQ_timeseries_ZE2010.json")

tert_timeseries_ZE2010 = pd.read_json("../Data processing/Output/tert_timeseries_ZE2010.json")

In [None]:
### Importing other data

foreigndebt_timeseries = pd.read_json("../Data processing/Output/foreigndebt_timeseries.json")
foreigncapitals_timeseries = pd.read_json("../Data processing/Output/foreigncapitals_timeseries.json")

We will below implement a global prediction of polarisation levels from financiarisation and productivity disparities

In [None]:
### Extracting D1 and D9 index for ZE1990 and ZE2010 data

def D1D9_polarisation(med_timeseries, bottom_timeseries, top_timeseries, D1D9_number):
    top_index = med_timeseries.mean().nlargest(D1D9_number).index
    bottom_index = med_timeseries.mean().nsmallest(D1D9_number).index
    
    D1D9_pol = np.log(top_timeseries[top_index]).mean(axis=1) -  np.log(bottom_timeseries[bottom_index]).mean(axis=1)
    
    return(D1D9_pol)

D1D9_number = 25

In [None]:
### Regression on global polarisation (static)

    ## Defining exog (with centered regressors)
X = pd.DataFrame(index = range(1999, 2018))

X["Intercept"] = 1
X["Dette étrangère"] = np.log(foreigndebt_timeseries)
X["Capitaux étrangers"] = np.log(foreigncapitals_timeseries)
X["Polarisation de la composition sectorielle"] = D1D9_polarisation(med_timeseries_ZE2010, tert_timeseries_ZE2010, tert_timeseries_ZE2010, D1D9_number)

X[X.columns[1:]] = X[X.columns[1:]]-X[X.columns[1:]].mean() ### centering all columns but intercept

    ## Defining endog
y = pd.DataFrame(index = range(2001,2018))

y["Polarisation des revenus"] = pd.concat([D1D9_polarisation(med_timeseries_ZE1990, d1_timeseries_ZE1990, d9_timeseries_ZE1990, D1D9_number), D1D9_polarisation(med_timeseries_ZE2010, d1_timeseries_ZE2010, d9_timeseries_ZE2010, D1D9_number)])

    ## Regressing
results = sm.GLS(y, X.loc[y.index]).fit()

print(results.summary())

In [None]:
### Regression on global polarisation (dynamic)

    ## Defining exog
X = pd.DataFrame(index = range(1999, 2018))
X["Intercept"] = 1

X["Dette étrangère"] = np.log(foreigndebt_timeseries)
X["Capitaux étrangers"] = np.log(foreigncapitals_timeseries)
X["Polarisation de la composition sectorielle"] = D1D9_polarisation(med_timeseries_ZE2010, tert_timeseries_ZE2010, tert_timeseries_ZE2010, D1D9_number)
X["Polarisation des revenus"] = pd.concat([D1D9_polarisation(med_timeseries_ZE1990, d1_timeseries_ZE1990, d9_timeseries_ZE1990, D1D9_number), D1D9_polarisation(med_timeseries_ZE2010, d1_timeseries_ZE2010, d9_timeseries_ZE2010, D1D9_number)])

X = X.dropna()
X[X.columns[1:]] = X[X.columns[1:]]-X[X.columns[1:]].mean() ### centering all columns but intercept

    ## Defining endog
y = pd.DataFrame(index = range(1999,2018))

y["Variation annuelle de la polarisation des revenus"] = X["Polarisation des revenus"].diff().shift(-1)
y = y.dropna()

    ## Regressing
results = sm.GLS(y, X.loc[y.index]).fit()

print(results.summary())

In [None]:
### Drawing data

fig = plt.figure(figsize=(9,6))

X_plot = X

plt.plot(X_plot["Dette étrangère"], label="Montant de la dette d'État détenus par des capitaux étrangers")
plt.plot(X_plot["Capitaux étrangers"], label="Montant des parts des entreprises du CAC40 détenus par des capitaux étrangers")
plt.plot(X_plot["Polarisation de la composition sectorielle"], label="Polarisation entre la tertiarisation des " + str(D1D9_number) + " ZE les plus (resp. les moins) riches")
plt.plot(X_plot["Polarisation des revenus"], label="Polarisation entre le d9 (resp. le d1) des " + str(D1D9_number) + " ZE les plus (resp. les moins) riches")

plt.legend()

plt.savefig("Global regression variables.png")
plt.show()

In [None]:
### Defining data for regression on local polarisation proxies (ZE1990)

    ## Defining endog
X = pd.DataFrame(index=pd.MultiIndex.from_product([med_timeseries_ZE1990.columns, range(2001,2009)]))
X["Intercept"] = 1

X["UC density"] = np.log(dUC_timeseries_ZE1990).stack().swaplevel()
X["Median revenue"] = np.log(med_timeseries_ZE1990).stack().swaplevel()
X["Interdecile quotient"] = np.log(intQ_timeseries_ZE1990).stack().swaplevel()

X["Dette étrangère"] = np.log(foreigndebt_timeseries)
X["Capitaux étrangers"] = np.log(foreigncapitals_timeseries)

markpot_ZE1990 = (med_timeseries_ZE1990*dUC_timeseries_ZE1990*area_ZE1990).dot((1/dist_matrix_ZE1990).replace([np.inf, -np.inf], 1))
X["Market potential"] = np.log(markpot_ZE1990).stack().swaplevel()

X[X.columns[1:]] = X[X.columns[1:]]-X[X.columns[1:]].mean() ### centering all columns but intercept
X = X.dropna()

    
    ## Defining exog
y = pd.DataFrame(index=pd.MultiIndex.from_product([med_timeseries_ZE1990.columns, range(2001,2009)]))

y["UC density"] = np.log(dUC_timeseries_ZE1990).diff().shift(-1).stack().swaplevel()
y["Median revenue"] = np.log(med_timeseries_ZE1990).diff().shift(-1).stack().swaplevel()
y["Interdecile quotient"] = np.log(intQ_timeseries_ZE1990).diff().shift(-1).stack().swaplevel()

y = y.dropna()

In [None]:
    ## Regressing on UC density
results = sm.GLS(y["UC density"], X.loc[y.index]).fit()

print(results.summary())

In [None]:
    ## Regressing on med revenue
results = sm.GLS(y["Median revenue"], X.loc[y.index]).fit()

print(results.summary())

In [None]:
    ## Regressing on interdecile quotient
results = sm.GLS(y["Interdecile quotient"], X.loc[y.index]).fit()

print(results.summary())

In [None]:
### Defining data for regression on local polarisation proxies (ZE2010)

    ## Defining endog
X = pd.DataFrame(index=pd.MultiIndex.from_product([med_timeseries_ZE2010.columns, range(2009,2017)]))
X["Intercept"] = 1

X["UC density"] = np.log(dUC_timeseries_ZE2010).stack().swaplevel()
X["Median revenue"] = np.log(med_timeseries_ZE2010).stack().swaplevel()
X["Interdecile quotient"] = np.log(intQ_timeseries_ZE2010).stack().swaplevel()

markpot_ZE2010 = (med_timeseries_ZE2010*dUC_timeseries_ZE2010*area_ZE2010).dot((1/dist_matrix_ZE2010).replace([np.inf, -np.inf], 1))
X["Market potential"] = np.log(markpot_ZE2010).stack().swaplevel()

X[X.columns[1:]] = X[X.columns[1:]]-X[X.columns[1:]].mean() ### centering all columns but intercept
X = X.dropna()
    
    ## Defining exog
y = pd.DataFrame(index=pd.MultiIndex.from_product([med_timeseries_ZE2010.columns, range(2009,2017)]))

y["UC density"] = np.log(dUC_timeseries_ZE2010).diff().shift(-1).stack().swaplevel()
y["Median revenue"] = np.log(med_timeseries_ZE2010).diff().shift(-1).stack().swaplevel()
y["Interdecile quotient"] = np.log(intQ_timeseries_ZE2010).diff().shift(-1).stack().swaplevel()

y = y.dropna()

In [None]:
    ## Regressing on UC density
results = sm.GLS(y["UC density"], X.loc[y.index]).fit()

print(results.summary())

In [None]:
    ## Regressing on med revenue
results = sm.GLS(y["Median revenue"], X.loc[y.index]).fit()

print(results.summary())

In [None]:
    ## Regressing on interdecile quotient
results = sm.GLS(y["Interdecile quotient"], X.loc[y.index]).fit()

print(results.summary())