# Exploratory Data Analysis - Paris Quartiers (2013-2021)

**Objective**: Initial exploration of the merged Paris datasets to understand data structure, quality, distributions, and temporal patterns.

**Datasets**: 
- paris_merged_2013.parquet
- paris_merged_2017.parquet  
- paris_merged_2021.parquet

**Scope**: General data understanding (gentrification analysis will be in a separate notebook)

## 1. Setup & Data Loading

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from pathlib import Path

# Plotting configuration
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 2)

In [2]:
# Load the three datasets
data_path = Path('../datasets')

df_2013 = pd.read_parquet(data_path / 'paris_merged_2013.parquet')
df_2017 = pd.read_parquet(data_path / 'paris_merged_2017.parquet')
df_2021 = pd.read_parquet(data_path / 'paris_merged_2021.parquet')

# Combine all years for temporal analysis
df_all = pd.concat([df_2013, df_2017, df_2021], ignore_index=True)

print(f"Data loaded successfully:")
print(f"  2013: {len(df_2013)} quartiers")
print(f"  2017: {len(df_2017)} quartiers")
print(f"  2021: {len(df_2021)} quartiers")
print(f"  Combined: {len(df_all)} total observations")

Data loaded successfully:
  2013: 71 quartiers
  2017: 71 quartiers
  2021: 71 quartiers
  Combined: 213 total observations


## 2. Data Overview & Quality

In [3]:
# Dataset structure
print("Dataset structure:")
print(f"Shape: {df_all.shape}")
print(f"\nColumns ({len(df_all.columns)}):")
df_all.info()

Dataset structure:
Shape: (213, 25)

Columns (25):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213 entries, 0 to 212
Data columns (total 25 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   neighborhood       213 non-null    object 
 1   annee              213 non-null    int64  
 2   type_neighborhood  213 non-null    object 
 3   pct_H              213 non-null    float64
 4   n_iris_H           213 non-null    float64
 5   n_iris_D           213 non-null    float64
 6   n_iris_A           213 non-null    float64
 7   n_iris_total       213 non-null    float64
 8   median_uc          213 non-null    float64
 9   pop_bac_sup        213 non-null    float64
 10  pop_total          213 non-null    float64
 11  pop_cadres         213 non-null    float64
 12  pop_ouvriers       213 non-null    float64
 13  pop_25_39          213 non-null    float64
 14  pop_65plus         213 non-null    float64
 15  pop_immigres       213 

In [4]:
# First rows sample
print("Sample data (2021):")
df_2021.head()

Sample data (2021):


Unnamed: 0,neighborhood,annee,type_neighborhood,pct_H,n_iris_H,n_iris_D,n_iris_A,n_iris_total,median_uc,pop_bac_sup,pop_total,pop_cadres,pop_ouvriers,pop_25_39,pop_65plus,pop_immigres,pop_15plus,pct_cadres,pct_ouvriers,pct_25_39,pct_65plus,pct_immigres,prix_m2_mean,prix_m2_median,n_transactions
0,Amérique,2021,Habitation,96.0,24,0,1,25,24610.0,20017.89,57165.27,10445.82,3232.12,12611.23,9653.51,13455.97,48402.25,21.58,6.68,22.06,16.89,23.54,9072.85,9035.36,506.0
1,Arsenal,2021,Habitation,100.0,4,0,0,4,36735.0,4686.37,9042.63,2461.65,228.55,2329.56,1647.74,1580.96,7990.43,30.81,2.86,25.76,18.22,17.48,12977.36,13148.19,158.0
2,Arts et Métiers,2021,Habitation,100.0,5,0,0,5,33520.0,4989.5,8902.05,3221.35,253.19,3035.18,1062.57,1935.77,7943.02,40.56,3.19,34.1,11.94,21.75,11922.29,12039.75,186.0
3,Auteuil,2021,Habitation,96.77,30,0,1,31,40105.0,35716.4,70524.25,17317.73,1504.58,12028.79,16736.51,14273.95,61322.28,28.24,2.45,17.06,23.73,20.24,10949.35,10869.57,1051.0
4,Batignolles,2021,Habitation,100.0,16,0,0,16,37865.0,22329.67,42674.29,14339.18,1430.56,13955.7,5335.47,6803.27,37027.68,38.73,3.86,32.7,12.5,15.94,11546.05,11747.09,833.0


In [5]:
# Missing values analysis
missing = df_all.isnull().sum()
missing_pct = (missing / len(df_all)) * 100
missing_df = pd.DataFrame({
    'Missing_Count': missing,
    'Missing_Percent': missing_pct
}).sort_values('Missing_Count', ascending=False)

print("Missing values by column:")
print(missing_df[missing_df['Missing_Count'] > 0])

# Visualize missing values
if missing_df['Missing_Count'].sum() > 0:
    fig, ax = plt.subplots(figsize=(10, 6))
    missing_df[missing_df['Missing_Count'] > 0].plot(y='Missing_Percent', kind='barh', ax=ax)
    ax.set_xlabel('Missing %')
    ax.set_title('Missing Values by Variable')
    plt.tight_layout()
    plt.show()
else:
    print("\n✓ No missing values found!")

Missing values by column:
Empty DataFrame
Columns: [Missing_Count, Missing_Percent]
Index: []

✓ No missing values found!


In [6]:
# Check quartier consistency across years
quartiers_2013 = set(df_2013['quartier'])
quartiers_2017 = set(df_2017['quartier'])
quartiers_2021 = set(df_2021['quartier'])

print("Quartier consistency across years:")
print(f"  2013: {len(quartiers_2013)} quartiers")
print(f"  2017: {len(quartiers_2017)} quartiers")
print(f"  2021: {len(quartiers_2021)} quartiers")
print(f"  Common to all years: {len(quartiers_2013 & quartiers_2017 & quartiers_2021)} quartiers")

if quartiers_2013 != quartiers_2017 or quartiers_2013 != quartiers_2021:
    print("\nDifferences found:")
    only_2013 = quartiers_2013 - quartiers_2017 - quartiers_2021
    only_2017 = quartiers_2017 - quartiers_2013 - quartiers_2021
    only_2021 = quartiers_2021 - quartiers_2013 - quartiers_2017
    if only_2013:
        print(f"  Only in 2013: {only_2013}")
    if only_2017:
        print(f"  Only in 2017: {only_2017}")
    if only_2021:
        print(f"  Only in 2021: {only_2021}")

KeyError: 'quartier'

## 3. Descriptive Statistics

In [None]:
# Define variable groups
id_vars = ['quartier', 'annee', 'type_quartier', 'pct_H']
iris_vars = ['n_iris_H', 'n_iris_D', 'n_iris_A', 'n_iris_total']
income_vars = ['median_uc']
education_vars = ['pop_bac_sup']
pop_abs_vars = ['pop_total', 'pop_cadres', 'pop_ouvriers', 'pop_25_39', 'pop_65plus', 'pop_immigres', 'pop_15plus']
pop_pct_vars = ['pct_cadres', 'pct_ouvriers', 'pct_25_39', 'pct_65plus', 'pct_immigres']
price_vars = ['prix_m2_mean', 'prix_m2_median', 'n_transactions']

numeric_vars = iris_vars + income_vars + education_vars + pop_abs_vars + pop_pct_vars + price_vars

In [None]:
# Overall statistics by year
print("=" * 80)
print("DESCRIPTIVE STATISTICS BY YEAR")
print("=" * 80)

for year in [2013, 2017, 2021]:
    print(f"\n{'='*40}")
    print(f"Year {year}")
    print(f"{'='*40}")
    df_year = df_all[df_all['annee'] == year]
    print(df_year[numeric_vars].describe().T[['mean', 'std', 'min', '50%', 'max']])

In [None]:
# Key indicators summary table
key_indicators = ['median_uc', 'prix_m2_median', 'pct_cadres', 'pct_ouvriers', 'pct_immigres', 'pop_total']

summary_stats = []
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    stats = df_year[key_indicators].describe().loc[['mean', 'std', 'min', 'max']].T
    stats['year'] = year
    summary_stats.append(stats)

summary_df = pd.concat(summary_stats)
print("\nKey Indicators Summary:")
summary_df

## 4. Distribution Analysis

### 4.1 Income Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Distribution by year
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    axes[0].hist(df_year['median_uc'], alpha=0.5, bins=20, label=str(year))

axes[0].set_xlabel('Median Income per UC (€)')
axes[0].set_ylabel('Number of Quartiers')
axes[0].set_title('Income Distribution by Year')
axes[0].legend()

# Box plot comparison
df_all.boxplot(column='median_uc', by='annee', ax=axes[1])
axes[1].set_xlabel('Year')
axes[1].set_ylabel('Median Income per UC (€)')
axes[1].set_title('Income Distribution Comparison')
plt.suptitle('')

plt.tight_layout()
plt.show()

### 4.2 Real Estate Prices

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Price per m² distribution
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    axes[0, 0].hist(df_year['prix_m2_median'], alpha=0.5, bins=20, label=str(year))

axes[0, 0].set_xlabel('Median Price per m² (€)')
axes[0, 0].set_ylabel('Number of Quartiers')
axes[0, 0].set_title('Price Distribution by Year')
axes[0, 0].legend()

# Box plot
df_all.boxplot(column='prix_m2_median', by='annee', ax=axes[0, 1])
axes[0, 1].set_xlabel('Year')
axes[0, 1].set_ylabel('Median Price per m² (€)')
axes[0, 1].set_title('Price Comparison')
plt.suptitle('')

# Number of transactions
df_all.boxplot(column='n_transactions', by='annee', ax=axes[1, 0])
axes[1, 0].set_xlabel('Year')
axes[1, 0].set_ylabel('Number of Transactions')
axes[1, 0].set_title('Transaction Volume Comparison')

# Mean vs Median price
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    axes[1, 1].scatter(df_year['prix_m2_median'], df_year['prix_m2_mean'], alpha=0.5, label=str(year))

axes[1, 1].plot([0, df_all['prix_m2_median'].max()], [0, df_all['prix_m2_median'].max()], 'k--', alpha=0.3)
axes[1, 1].set_xlabel('Median Price per m² (€)')
axes[1, 1].set_ylabel('Mean Price per m² (€)')
axes[1, 1].set_title('Mean vs Median Prices')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

### 4.3 Socio-Professional Categories

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Cadres distribution
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    axes[0, 0].hist(df_year['pct_cadres'], alpha=0.5, bins=20, label=str(year))

axes[0, 0].set_xlabel('% Cadres')
axes[0, 0].set_ylabel('Number of Quartiers')
axes[0, 0].set_title('Cadres Distribution')
axes[0, 0].legend()

# Ouvriers distribution
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    axes[0, 1].hist(df_year['pct_ouvriers'], alpha=0.5, bins=20, label=str(year))

axes[0, 1].set_xlabel('% Ouvriers')
axes[0, 1].set_ylabel('Number of Quartiers')
axes[0, 1].set_title('Ouvriers Distribution')
axes[0, 1].legend()

# Cadres vs Ouvriers (2021)
df_2021_plot = df_all[df_all['annee'] == 2021]
axes[1, 0].scatter(df_2021_plot['pct_ouvriers'], df_2021_plot['pct_cadres'], alpha=0.6)
axes[1, 0].set_xlabel('% Ouvriers')
axes[1, 0].set_ylabel('% Cadres')
axes[1, 0].set_title('Cadres vs Ouvriers (2021)')

# Box plots comparison
data_to_plot = [df_all[df_all['annee'] == year][['pct_cadres', 'pct_ouvriers']] for year in [2013, 2017, 2021]]
positions = [[1, 2], [4, 5], [7, 8]]
colors = ['lightblue', 'lightcoral']

for i, (data, pos) in enumerate(zip(data_to_plot, positions)):
    bp = axes[1, 1].boxplot([data['pct_cadres'], data['pct_ouvriers']], positions=pos, widths=0.6,
                             patch_artist=True, labels=['Cadres', 'Ouvriers'])
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)

axes[1, 1].set_xticks([1.5, 4.5, 7.5])
axes[1, 1].set_xticklabels(['2013', '2017', '2021'])
axes[1, 1].set_ylabel('Percentage')
axes[1, 1].set_title('Socio-Professional Categories Evolution')

plt.tight_layout()
plt.show()

### 4.4 Demographics

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Population totale
df_all.boxplot(column='pop_total', by='annee', ax=axes[0, 0])
axes[0, 0].set_xlabel('Year')
axes[0, 0].set_ylabel('Total Population')
axes[0, 0].set_title('Total Population by Year')

# Age groups
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    axes[0, 1].hist(df_year['pct_25_39'], alpha=0.5, bins=20, label=str(year))

axes[0, 1].set_xlabel('% Population 25-39 years')
axes[0, 1].set_ylabel('Number of Quartiers')
axes[0, 1].set_title('Young Adults Distribution (25-39)')
axes[0, 1].legend()

# Immigration
for year in [2013, 2017, 2021]:
    df_year = df_all[df_all['annee'] == year]
    axes[1, 0].hist(df_year['pct_immigres'], alpha=0.5, bins=20, label=str(year))

axes[1, 0].set_xlabel('% Immigrants')
axes[1, 0].set_ylabel('Number of Quartiers')
axes[1, 0].set_title('Immigrant Population Distribution')
axes[1, 0].legend()

# Education
df_all.boxplot(column='pop_bac_sup', by='annee', ax=axes[1, 1])
axes[1, 1].set_xlabel('Year')
axes[1, 1].set_ylabel('Population with Bac+2 or higher')
axes[1, 1].set_title('Education Level by Year')

plt.suptitle('')
plt.tight_layout()
plt.show()

## 5. Temporal Evolution (2013-2021)

In [None]:
# Calculate means by year for key indicators
temporal_means = df_all.groupby('annee')[key_indicators].mean()

fig, axes = plt.subplots(2, 3, figsize=(16, 10))
axes = axes.flatten()

for i, var in enumerate(key_indicators):
    axes[i].plot([2013, 2017, 2021], temporal_means[var], marker='o', linewidth=2, markersize=8)
    axes[i].set_xlabel('Year')
    axes[i].set_ylabel(var.replace('_', ' ').title())
    axes[i].set_title(f'Evolution: {var.replace("_", " ").title()}')
    axes[i].grid(True, alpha=0.3)
    axes[i].set_xticks([2013, 2017, 2021])

plt.suptitle('Temporal Evolution of Key Indicators (Mean Values)', fontsize=14, y=1.00)
plt.tight_layout()
plt.show()

print("\nMean values by year:")
print(temporal_means)

In [None]:
# Calculate percentage change 2013-2021
pct_change = ((temporal_means.loc[2021] - temporal_means.loc[2013]) / temporal_means.loc[2013] * 100).sort_values(ascending=False)

fig, ax = plt.subplots(figsize=(10, 6))
colors = ['green' if x > 0 else 'red' for x in pct_change]
pct_change.plot(kind='barh', ax=ax, color=colors)
ax.set_xlabel('% Change 2013-2021')
ax.set_title('Percentage Change in Key Indicators (2013-2021)')
ax.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
plt.tight_layout()
plt.show()

print("\nPercentage change (2013-2021):")
print(pct_change)

## 6. Correlation Analysis

In [None]:
# Focus on key socio-economic variables for correlation
corr_vars = ['median_uc', 'prix_m2_median', 'pct_cadres', 'pct_ouvriers', 
             'pct_immigres', 'pct_25_39', 'pop_bac_sup', 'pop_total']

# Calculate correlation for 2021
corr_2021 = df_2021[corr_vars].corr()

fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(corr_2021, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, linewidths=1, cbar_kws={"shrink": 0.8}, ax=ax)
ax.set_title('Correlation Matrix - Key Variables (2021)', fontsize=14, pad=20)
plt.tight_layout()
plt.show()

In [None]:
# Key relationships scatter plots
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# Income vs Price
axes[0, 0].scatter(df_2021['median_uc'], df_2021['prix_m2_median'], alpha=0.6)
axes[0, 0].set_xlabel('Median Income per UC (€)')
axes[0, 0].set_ylabel('Median Price per m² (€)')
axes[0, 0].set_title('Income vs Real Estate Price (2021)')
z = np.polyfit(df_2021['median_uc'].dropna(), df_2021['prix_m2_median'].dropna(), 1)
p = np.poly1d(z)
axes[0, 0].plot(df_2021['median_uc'].sort_values(), p(df_2021['median_uc'].sort_values()), "r--", alpha=0.5)

# Cadres vs Price
axes[0, 1].scatter(df_2021['pct_cadres'], df_2021['prix_m2_median'], alpha=0.6)
axes[0, 1].set_xlabel('% Cadres')
axes[0, 1].set_ylabel('Median Price per m² (€)')
axes[0, 1].set_title('Cadres vs Real Estate Price (2021)')
z = np.polyfit(df_2021['pct_cadres'].dropna(), df_2021['prix_m2_median'].dropna(), 1)
p = np.poly1d(z)
axes[0, 1].plot(df_2021['pct_cadres'].sort_values(), p(df_2021['pct_cadres'].sort_values()), "r--", alpha=0.5)

# Cadres vs Ouvriers
axes[1, 0].scatter(df_2021['pct_ouvriers'], df_2021['pct_cadres'], alpha=0.6)
axes[1, 0].set_xlabel('% Ouvriers')
axes[1, 0].set_ylabel('% Cadres')
axes[1, 0].set_title('Cadres vs Ouvriers (2021)')
z = np.polyfit(df_2021['pct_ouvriers'].dropna(), df_2021['pct_cadres'].dropna(), 1)
p = np.poly1d(z)
axes[1, 0].plot(df_2021['pct_ouvriers'].sort_values(), p(df_2021['pct_ouvriers'].sort_values()), "r--", alpha=0.5)

# Education vs Income
axes[1, 1].scatter(df_2021['pop_bac_sup'], df_2021['median_uc'], alpha=0.6)
axes[1, 1].set_xlabel('Population with Bac+2 or higher')
axes[1, 1].set_ylabel('Median Income per UC (€)')
axes[1, 1].set_title('Education vs Income (2021)')
z = np.polyfit(df_2021['pop_bac_sup'].dropna(), df_2021['median_uc'].dropna(), 1)
p = np.poly1d(z)
axes[1, 1].plot(df_2021['pop_bac_sup'].sort_values(), p(df_2021['pop_bac_sup'].sort_values()), "r--", alpha=0.5)

plt.tight_layout()
plt.show()

## 7. Spatial Analysis

In [None]:
# Load geographic data
gdf = gpd.read_parquet(data_path / 'quartiers_paris_geo.parquet')

# Merge with 2021 data
gdf_2021 = gdf.merge(df_2021, on='quartier', how='left')

print(f"Loaded {len(gdf)} quartiers with geographic data")
print(f"Merged with {len(gdf_2021[gdf_2021['annee'].notna()])} quartiers having 2021 data")

In [None]:
# Create maps for key variables
fig, axes = plt.subplots(2, 2, figsize=(16, 14))

# Median income
gdf_2021.plot(column='median_uc', ax=axes[0, 0], legend=True, 
              cmap='YlGnBu', edgecolor='black', linewidth=0.5,
              missing_kwds={'color': 'lightgrey', 'hatch': '///', 'edgecolor': 'black'})
axes[0, 0].set_title('Median Income per UC (2021)', fontsize=12)
axes[0, 0].axis('off')

# Median price per m²
gdf_2021.plot(column='prix_m2_median', ax=axes[0, 1], legend=True,
              cmap='OrRd', edgecolor='black', linewidth=0.5,
              missing_kwds={'color': 'lightgrey', 'hatch': '///', 'edgecolor': 'black'})
axes[0, 1].set_title('Median Price per m² (2021)', fontsize=12)
axes[0, 1].axis('off')

# % Cadres
gdf_2021.plot(column='pct_cadres', ax=axes[1, 0], legend=True,
              cmap='Purples', edgecolor='black', linewidth=0.5,
              missing_kwds={'color': 'lightgrey', 'hatch': '///', 'edgecolor': 'black'})
axes[1, 0].set_title('% Cadres (2021)', fontsize=12)
axes[1, 0].axis('off')

# % Immigrants
gdf_2021.plot(column='pct_immigres', ax=axes[1, 1], legend=True,
              cmap='Greens', edgecolor='black', linewidth=0.5,
              missing_kwds={'color': 'lightgrey', 'hatch': '///', 'edgecolor': 'black'})
axes[1, 1].set_title('% Immigrants (2021)', fontsize=12)
axes[1, 1].axis('off')

plt.suptitle('Spatial Distribution of Key Indicators (2021)', fontsize=14, y=0.98)
plt.tight_layout()
plt.show()

In [None]:
# Map showing quartier types
fig, ax = plt.subplots(figsize=(12, 10))

# Define colors and hatches for different types
type_colors = {
    'Habitation': 'lightblue',
    'Activité': 'lightgrey',
    'Divers': 'lightgreen'
}

type_hatches = {
    'Habitation': '',
    'Activité': '///',
    'Divers': '\\\\\\'
}

for qtype in gdf['type_quartier'].unique():
    gdf[gdf['type_quartier'] == qtype].plot(
        ax=ax,
        color=type_colors.get(qtype, 'white'),
        edgecolor='black',
        linewidth=0.5,
        label=qtype,
        hatch=type_hatches.get(qtype, '')
    )

ax.set_title('Paris Quartiers by Type', fontsize=14)
ax.legend(loc='upper right')
ax.axis('off')
plt.tight_layout()
plt.show()

print("\nQuartier type distribution:")
print(gdf['type_quartier'].value_counts())

## 8. Summary & Key Insights

In [None]:
print("="*80)
print("SUMMARY OF KEY FINDINGS")
print("="*80)

print("\n1. DATA QUALITY")
print(f"   - Total quartiers per year: {len(df_2021)} (consistent across years)")
print(f"   - Complete data: {df_all.isnull().sum().sum() == 0}")
print(f"   - All quartiers are type 'Habitation' (>50% IRIS type H)")

print("\n2. TEMPORAL EVOLUTION (2013-2021)")
for var in key_indicators:
    change = ((temporal_means.loc[2021, var] - temporal_means.loc[2013, var]) / temporal_means.loc[2013, var]) * 100
    print(f"   - {var}: {change:+.1f}%")

print("\n3. KEY CORRELATIONS (2021)")
strong_corr = corr_2021.abs() > 0.7
for i in range(len(strong_corr)):
    for j in range(i+1, len(strong_corr)):
        if strong_corr.iloc[i, j]:
            print(f"   - {corr_2021.index[i]} <-> {corr_2021.columns[j]}: {corr_2021.iloc[i, j]:.2f}")

print("\n4. DISTRIBUTION CHARACTERISTICS (2021)")
print(f"   - Income range: €{df_2021['median_uc'].min():.0f} - €{df_2021['median_uc'].max():.0f}")
print(f"   - Price range: €{df_2021['prix_m2_median'].min():.0f} - €{df_2021['prix_m2_median'].max():.0f} per m²")
print(f"   - Cadres: {df_2021['pct_cadres'].min():.1f}% - {df_2021['pct_cadres'].max():.1f}%")
print(f"   - Ouvriers: {df_2021['pct_ouvriers'].min():.1f}% - {df_2021['pct_ouvriers'].max():.1f}%")

print("\n" + "="*80)