# Etude de la répartition public/privé des établissements scolaires français selon les résultats au brevet

### 1. Importing librairies

In [1]:
import time
startTime = time.time()

import pandas as pd
import plotly.express as px
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import plotly
import plotly.graph_objects as go
import plotly.io as pio
from tabulate import tabulate
import os

### 2. Creating the relevant folders and paths

In [2]:
# Defining current folder as our main directory
dirname = os.getcwd()

# location folders variables
data_in = dirname + "\\da_data_raw\\" #raw data
data_out = dirname + "\\da_data_workfiles\\" #clean data
graph_out = dirname + "\\graphs\\" #graphs
html_graph_out = "C:\\Users\\33671\\Documents\\my-website\\html5up-massively\\images\\graphs\\" #graphs to be used for html report

# Checking if data_in path is not present, then create it
if not os.path.exists(data_in):
    os.makedirs(data_in)
    print(f"Directory Created: {data_in}")
else: 
    print(f"Already existing directory: {data_in}")
    
# Checking if data_out path is not present then create it
if not os.path.exists(data_out):
    os.makedirs(data_out)
    print(f"Directory Created: {data_out}")
else:
    print(f"Already existing directory: {data_out}")
    
# Checking if data_out path is not present then create it
if not os.path.exists(graph_out):
    os.makedirs(graph_out)
    print(f"Directory Created: {graph_out}")
else:
    print(f"Already existing directory: {graph_out}")    

# Printing main directories we will work with
print(f"\nMain directory: {dirname}")
print(f"Raw data folder: {data_in}")
print(f"Clean data folder: {data_out}")

Already existing directory: c:\Users\33671\Documents\Python\IPS\da_data_raw\
Already existing directory: c:\Users\33671\Documents\Python\IPS\da_data_workfiles\
Already existing directory: c:\Users\33671\Documents\Python\IPS\graphs\

Main directory: c:\Users\33671\Documents\Python\IPS
Raw data folder: c:\Users\33671\Documents\Python\IPS\da_data_raw\
Clean data folder: c:\Users\33671\Documents\Python\IPS\da_data_workfiles\


### 3. Importing master file

In [3]:
df = pd.read_csv(f"{data_out}\df_master.csv")

  df = pd.read_csv(f"{data_out}\df_master.csv")


In [4]:
df.head()

Unnamed: 0,uai,nom_etablissment,type_etablissement,code_academie,academie,code_departement,departement,code_insee_commune,code_region,region,...,longitude,appariement,position,date_ouverture,niveau_de_vie_commune,niveau_de_vie_departement,libgeo,zonage_rur,zonage_rur_lib,zone_rur_simple
0,0640891w,ecole elementaire,ecole,4.0,bordeaux,64,pyrenees-atlantiques,64253,75.0,nouvelle-aquitaine,...,-0.754035,manuel,"{'lon': -0.7540354152972131, 'lat': 43.2865348...",1970-02-05,18538.5,20478.793359,gurs,2.0,rural autonome peu dense,rural
1,0640960w,ecole primaire publique,ecole,4.0,bordeaux,64,pyrenees-atlantiques,64359,75.0,nouvelle-aquitaine,...,-0.656443,manuel,"{'lon': -0.6564426369228661, 'lat': 43.2854034...",1970-02-04,19287.0,20478.793359,lucq-de-béarn,2.0,rural autonome peu dense,rural
2,0640962y,ecole primaire publique,ecole,4.0,bordeaux,64,pyrenees-atlantiques,64317,75.0,nouvelle-aquitaine,...,-1.437478,manuel,"{'lon': -1.4374780402733371, 'lat': 43.3722550...",1970-02-04,21374.0,20478.793359,larressore,4.0,rural sous forte influence d'un pole,urbain
3,0641065k,ecole elementaire publique,ecole,4.0,bordeaux,64,pyrenees-atlantiques,64459,75.0,nouvelle-aquitaine,...,-0.717289,manuel,"{'lon': -0.717288729824593, 'lat': 43.27619860...",1970-02-05,19684.761905,20478.793359,préchacq-navarrenx,2.0,rural autonome peu dense,rural
4,0641069p,ecole elementaire publique,ecole,4.0,bordeaux,64,pyrenees-atlantiques,64462,75.0,nouvelle-aquitaine,...,-0.897519,manuel,"{'lon': -0.8975190867459131, 'lat': 43.5235168...",1970-01-10,19058.666667,20478.793359,ramous,2.0,rural autonome peu dense,rural


### 4. DND success rate violin distribution

In [5]:
# Reset to default plot style just in case
plt.style.use("default") 

# see https://matplotlib.org/stable/tutorials/introductory/customizing.html
# find default figure with mpl.rcParams.keys()

# Style spines
mpl.rcParams['axes.linewidth'] = 0.2  # Spine edge line width (default = 0.8)
mpl.rcParams['axes.spines.top'] = False # Top spine show (default = True)
mpl.rcParams['axes.spines.left'] = True # Left spine show (default = True)
mpl.rcParams['axes.spines.right'] = False # Right spine show (default = True)
mpl.rcParams['axes.spines.bottom'] = True # Bottom spine show (default = True)

# Grid style
mpl.rcParams['axes.grid'] = True # Adding grid (default = False)
mpl.rcParams['axes.grid.axis'] = 'y' # Grid on x or y or both axis (default = 'both')
mpl.rcParams['grid.linewidth'] = 0.2 # Grid line width (default = 0.8)
mpl.rcParams['grid.color'] = '#DBDBD0' # Grid line color (default = #b0b0b0)
mpl.rcParams['axes.axisbelow'] = 'line' # Draw grid line below patches (True), above all (False) or above patches but below lines ('line') (default = 'line')

# Figure size
mpl.rcParams['figure.figsize'] = 8, 4.5  # Figure size in inches (default = 6.4, 4.8)
mpl.rcParams['figure.dpi'] = 100 # Figure dots per inch (default = 100)

# Set spacing for figure (All dimensions are a fraction of the figure width and height)
mpl.rcParams['figure.subplot.left'] = 0.125 # Left (default = 0.125)
mpl.rcParams['figure.subplot.right'] = 0.90 # Right (default = 0.9)
mpl.rcParams['figure.subplot.bottom'] = 0.11 # Bottom (default = 0.11)
mpl.rcParams['figure.subplot.top'] = 0.88 # Top (default = 0.88)

# Legend Styling
mpl.rcParams['legend.frameon'] = False # If True, draw the legend on a background patch (default = True)
mpl.rcParams['legend.framealpha'] = 0.8 # Legend patch transparency (default = 0.8)
mpl.rcParams['legend.fontsize'] = 'small' # Legend font size (default = medium)
mpl.rcParams["legend.loc"] = 'best'

# Properties for saving the figure. Ensure a high DPI when saving so we have a good resolution.
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['savefig.bbox'] = 'tight'
#mpl.rcParams['savefig.pad_inches'] = 0.2

# Title styling
mpl.rcParams['axes.titlelocation'] = 'left' # Alignment of the title (default = center)
mpl.rcParams['axes.titlepad'] = 20 # Pad between axes and title (default = 6.0)
mpl.rcParams['axes.titlesize'] = 'medium' # (default = large)
mpl.rcParams['axes.titleweight'] = 'bold' # (default = normal)

# Setting font sizes and spacing
mpl.rcParams['axes.labelsize'] = 'small'
mpl.rcParams['xtick.labelsize'] = 'x-small'
mpl.rcParams['ytick.labelsize'] = 'x-small'
mpl.rcParams['font.size'] = 10
mpl.rcParams['xtick.major.pad'] = 3.5
mpl.rcParams['ytick.major.pad'] = 3.5

In [6]:
# pio.renderers.default = "notebook_connected"

fig = go.Figure()

# Trace public school 
fig.add_trace(go.Violin(x=df[(df['dnb_taux_de_reussite_2021'] >= 40) & (df['secteur'] == 'public')]['secteur'],
                        y=df[(df['dnb_taux_de_reussite_2021'] >= 40) & (df['secteur'] == 'public')]['dnb_taux_de_reussite_2021'],
                        legendgroup='public', 
                        scalegroup='public', 
                        name='public',
                        line_color='#E66360',
                        points='all',
                        )
)

# Trace private school
fig.add_trace(go.Violin(x=df[(df['dnb_taux_de_reussite_2021'] >= 40) & (df['secteur'] == 'prive')]['secteur'],
                        y=df[(df['dnb_taux_de_reussite_2021'] >= 40) & (df['secteur'] == 'prive')]['dnb_taux_de_reussite_2021'],
                        legendgroup='privé', 
                        scalegroup='privé', 
                        name='privé',
                        line_color='#8BC6FC',
                        points='all'
                        )
)

# Show boxes
fig.update_traces(box_visible=True, 
                  meanline_visible=True, 
                  marker=dict(size=0.5)
                  )



# Customize layout (no title)
fig.update_layout(
    #violinmode='group',
    width=900, 
    height=600,
    title="",
    xaxis_title="",
    yaxis_title="Taux de réussite au brevet (%)",
    margin=dict(l=90,
                r=50, 
                b=50, 
                t=40, 
                pad=4),
    legend=dict(orientation="h", 
                yanchor="bottom",
                y=1.01, 
                xanchor="left")
)

# Add source annotation     https://medium.com/nerd-for-tech/enriching-data-visualizations-with-annotations-in-plotly-using-python-6127ff6e0f80
fig.add_annotation(
    text = (f"Source: Ministère de l'éducation nationale via https://data.education.gouv.fr"), 
    showarrow=False, 
    x = -0.06, 
    y = -0.1, 
    xref='paper', 
    yref='paper', 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=8, color="grey"), 
    align="left"
    )

# Add copyright annotation
fig.add_annotation(
    text = (f"@crish1eev1"), 
    showarrow=False, 
    x = 0.92, 
    y = -0.1, 
    xref='paper', 
    yref='paper' , 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=10, color="grey"), 
    align="left"
)


# Save no-title file (used in report)      
file_name = 'violin-distribution-prive-public-selon-dnd_no-title' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'violin-distribution-prive-public-selon-dnd_no-title' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)


# Add title and change margins
fig.update_layout(title="Distribution des collèges selon le taux de réussite au brevet 2021",
                  margin=dict(l=90, r=50, b=50, t=70, pad=4),
)

# Save title file  
file_name = 'violin-distribution-prive-public-selon-dnd' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'violin-distribution-prive-public-selon-dnd' + ".html"
plotly.offline.plot(fig, filename=graph_out + file_name)

file_name = 'violin-distribution-prive-public-selon-dnd' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)



# Show fig
fig.show()

In [7]:
#common measures of central tendency for public elementary school
df[(df['dnb_taux_de_reussite_2021'] >= 1) & (df['secteur'] == 'public')]['dnb_taux_de_reussite_2021'].describe()

count    5283.000000
mean       86.297899
std         7.799217
min        25.000000
25%        81.600000
50%        87.400000
75%        91.900000
max       100.000000
Name: dnb_taux_de_reussite_2021, dtype: float64

In [8]:
len(df[(df['dnb_taux_de_reussite_2021'] == 100) & (df['secteur'] == 'public')]) / len(df[(df['dnb_taux_de_reussite_2021'] > 1) & (df['secteur'] == 'public')])

0.014385765663448798

In [9]:
len(df[(df['dnb_taux_de_reussite_2021'] == 100) & (df['secteur'] == 'public')])

76

In [10]:
#common measures of central tendency for public elementary school
df[(df['dnb_taux_de_reussite_2021'] >= 1) & (df['secteur'] == 'prive')]['dnb_taux_de_reussite_2021'].describe()

count    1643.000000
mean       95.677358
std         6.147771
min        30.000000
25%        94.100000
50%        97.700000
75%       100.000000
max       100.000000
Name: dnb_taux_de_reussite_2021, dtype: float64

In [11]:
len(df[(df['dnb_taux_de_reussite_2021'] == 100) & (df['secteur'] == 'prive')])

429

In [12]:
len(df[(df['dnb_taux_de_reussite_2021'] == 100) & (df['secteur'] == 'prive')]) / len(df[(df['dnb_taux_de_reussite_2021'] > 1) & (df['secteur'] == 'prive')])

0.26110772976262936

In [13]:
fig = px.scatter(df[(df['dnb_taux_de_reussite_2021'] > 40)], 
                 x="ips", y="dnb_taux_de_reussite_2021", 
                 color="secteur",
                 size='dnb_taux_de_mention_tb_2021',
                 size_max=15,
                 hover_name='nom_etablissment',
                 hover_data=['uai', 'academie', 'departement', 'commune', 'ips', 'dnb_taux_de_reussite_2021', 'dnb_taux_de_mention_tb_2021'], 
                 #animation_frame='region',
                 color_discrete_sequence=['#E66360','#8BC6FC'],
                 opacity=0.9,
                 marginal_x='histogram',
                 #marginal_y='histogram',
                 facet_col="secteur", 
                 trendline="ols"
                 )

# Customize layout (no title)
fig.update_layout(
    width=1300, 
    height=600,
    title="",
    #xaxis_title="",
    yaxis_title="Taux de réussite au brevet (%)",
    margin=dict(l=90,
                r=50, 
                b=70, 
                t=60, 
                pad=4),
    # legend=dict(orientation="h", 
    #             yanchor="bottom",
    #             y=1.01, 
    #             xanchor="left")
)

# Subtitle
fig.add_annotation(
    text = (f"Plus le point est large, plus l'établissement le taux de mention 'très bien' de l'établissement est élevé"), 
    showarrow=False, 
    x = 0.00, 
    y = 0.04, 
    xref='paper', 
    yref='paper', 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=8, color="grey"), 
    align="left"
    )

# Add source annotation     https://medium.com/nerd-for-tech/enriching-data-visualizations-with-annotations-in-plotly-using-python-6127ff6e0f80
fig.add_annotation(
    text = (f"Source: Ministère de l'éducation nationale via https://data.education.gouv.fr"), 
    showarrow=False, 
    x = -0.07, 
    y = -0.20, 
    xref='paper', 
    yref='paper', 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=8, color="grey"), 
    align="left"
    )

# Add copyright annotation
fig.add_annotation(
    text = (f"@crish1eev1"), 
    showarrow=False, 
    x = 1.02, 
    y = -0.20, 
    xref='paper', 
    yref='paper' , 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=10, color="grey"), 
    align="left"
)


# Save no-title file (used in report)      
file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb_no-title' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb_no-title' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)


# Add title and change margins
fig.update_layout(title="Distribution des collèges selon taux de reussite au brevet et indice de position sociale",
                  margin=dict(l=90, r=50, b=100, t=90, pad=4),
)

# Save title file  
file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb' + ".html"
plotly.offline.plot(fig, filename=graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)

fig.show()

In [14]:
df.sort_values(by=['region'], inplace=True)

In [15]:
fig = px.scatter(df[(df['dnb_taux_de_reussite_2021'] > 40)], 
                 x="ips", y="dnb_taux_de_reussite_2021", 
                 color="secteur",
                 size='dnb_taux_de_mention_tb_2021',
                 size_max=15,
                 hover_name='nom_etablissment',
                 hover_data=['uai', 'academie', 'departement', 'commune', 'ips', 'dnb_taux_de_reussite_2021', 'dnb_taux_de_mention_tb_2021'], 
                 animation_frame='region',
                 color_discrete_sequence=['#E66360','#8BC6FC'],
                 opacity=0.9,
                 #marginal_x='histogram',
                 #marginal_y='histogram',
                 facet_col="secteur", 
                 #trendline="ols"
                 )

fig.update_yaxes(range=[38, 104])
fig.update_xaxes(range=[24, 166])

# Customize layout (no title)
fig.update_layout(
    width=1300, 
    height=600,
    title="",
    #xaxis_title="Indice de Position Sociale",
    yaxis_title="Taux de réussite au brevet (%)",
    margin=dict(l=90,
                r=50, 
                b=70, 
                t=60, 
                pad=4),
    # legend=dict(orientation="h", 
    #             yanchor="bottom",
    #             y=1.01, 
    #             xanchor="left")
)

# Subtitle
fig.add_annotation(
    text = (f"Plus le point est large, plus l'établissement le taux de mention 'très bien' de l'établissement est élevé"), 
    showarrow=False, 
    x = 0.00, 
    y = 0.04, 
    xref='paper', 
    yref='paper', 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=8, color="grey"), 
    align="left"
    )

# Add source annotation     https://medium.com/nerd-for-tech/enriching-data-visualizations-with-annotations-in-plotly-using-python-6127ff6e0f80
# fig.add_annotation(
#     text = (f"Source: Ministère de l'éducation nationale via https://data.education.gouv.fr"), 
#     showarrow=False, 
#     x = -0.07, 
#     y = -0.41, 
#     xref='paper', 
#     yref='paper', 
#     xanchor='left', 
#     yanchor='bottom', 
#     xshift=-1, 
#     yshift=-5, 
#     font=dict(size=8, color="grey"), 
#     align="left"
#     )

# Add copyright annotation
fig.add_annotation(
    text = (f"@crish1eev1"), 
    showarrow=False, 
    x = 1.02, 
    y = -0.41, 
    xref='paper', 
    yref='paper' , 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=10, color="grey"), 
    align="left"
)


# Save no-title file (used in report)      
file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-region_no-title' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-region_no-title' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)


# Add title and change margins
fig.update_layout(title="Distribution des collèges selon taux de reussite au brevet et indice de position sociale",
                  margin=dict(l=90, r=50, b=70, t=90, pad=4),
)

# Save title file  
file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-region' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-region' + ".html"
plotly.offline.plot(fig, filename=graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-region' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)

fig.show()

In [16]:
df.sort_values(by=['departement'], inplace=True)

In [17]:
fig = px.scatter(df[(df['dnb_taux_de_reussite_2021'] > 40)], 
                 x="ips", y="dnb_taux_de_reussite_2021", 
                 color="secteur",
                 size='dnb_taux_de_mention_tb_2021',
                 size_max=15,
                 hover_name='nom_etablissment',
                 hover_data=['uai', 'academie', 'departement', 'commune', 'ips', 'dnb_taux_de_reussite_2021', 'dnb_taux_de_mention_tb_2021'], 
                 animation_frame='departement',
                 color_discrete_sequence=['#E66360','#8BC6FC'],
                 opacity=0.9,
                 #marginal_x='histogram',
                 #marginal_y='histogram',
                 facet_col="secteur", 
                 #trendline="ols"
                 )

fig.update_yaxes(range=[38, 104])
fig.update_xaxes(range=[24, 166])

# Customize layout (no title)
fig.update_layout(
    width=1300, 
    height=600,
    title="",
    #xaxis_title="Indice de Position Sociale",
    yaxis_title="Taux de réussite au brevet (%)",
    margin=dict(l=90,
                r=50, 
                b=70, 
                t=60, 
                pad=4),
    # legend=dict(orientation="h", 
    #             yanchor="bottom",
    #             y=1.01, 
    #             xanchor="left")
)

# Add source annotation     https://medium.com/nerd-for-tech/enriching-data-visualizations-with-annotations-in-plotly-using-python-6127ff6e0f80
# fig.add_annotation(
#     text = (f"Source: Ministère de l'éducation nationale via https://data.education.gouv.fr"), 
#     showarrow=False, 
#     x = -0.07, 
#     y = -0.41, 
#     xref='paper', 
#     yref='paper', 
#     xanchor='left', 
#     yanchor='bottom', 
#     xshift=-1, 
#     yshift=-5, 
#     font=dict(size=8, color="grey"), 
#     align="left"
#     )

# Add copyright annotation
fig.add_annotation(
    text = (f"@crish1eev1"), 
    showarrow=False, 
    x = 1.02, 
    y = -0.41, 
    xref='paper', 
    yref='paper' , 
    xanchor='left', 
    yanchor='bottom', 
    xshift=-1, 
    yshift=-5, 
    font=dict(size=10, color="grey"), 
    align="left"
)


# Save no-title file (used in report)      
file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-departement_no-title' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-departement_no-title' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)


# Add title and change margins
fig.update_layout(title="Distribution des collèges selon taux de reussite au brevet et indice de position sociale",
                  margin=dict(l=90, r=50, b=70, t=90, pad=4),
)

# Save title file  
file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-departement' + ".png"
fig.write_image(graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-departement' + ".html"
plotly.offline.plot(fig, filename=graph_out + file_name)

file_name = 'scatter-distribution-prive-public-selon-ips-et-dnb-par-departement' + ".html"
plotly.offline.plot(fig, filename=html_graph_out + file_name)

fig.show()

In [18]:
executionTime = (time.time() - startTime)
print('Execution time in seconds: ' + str(executionTime))

Execution time in seconds: 55.910146951675415
