### Bayesian network construction

In [24]:
import pandas as pd
import numpy as np
from PrivBayes import greedy_bayes
from utils import preprocessing, encoding, get_school_list, decoding, display_bayesian_network
import plotly.express as px

In [25]:
# Display the dataframe
pd.set_option('display.max_columns', None)  # or 1000
# pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

In [4]:
choices_df = pd.read_excel('data/2021.xlsx', sheet_name='Resultaten')
schools_df = pd.read_excel('data/2021.xlsx', sheet_name='Klassen')
schools = get_school_list(schools_df)
choices_df = preprocessing(choices_df)
encoded_choices_df = encoding(choices_df, schools)
edu_types = encoded_choices_df['Basisschool advies'].unique().tolist()
choice_cols = [c for c in list(encoded_choices_df.columns) if 'Voorkeur' in c]

In [35]:
k = 5
for e in edu_types:
    print(e, ":")
    subset = encoded_choices_df[encoded_choices_df['Basisschool advies'] == e]
    chosen_cols = ['Voorkeur {}'.format(i) for i in range(1, k+1)]
    first_choices_subset = subset[chosen_cols]

    bn = greedy_bayes(first_choices_subset, k=0, epsilon=0.1 / 2, seed=0)
    display_bayesian_network(bn)

havo :
Constructed Bayesian network:
    Voorkeur 3 has parents ['Voorkeur 4'].
    Voorkeur 5 has parents ['Voorkeur 3', 'Voorkeur 4'].
    Voorkeur 2 has parents ['Voorkeur 3', 'Voorkeur 5', 'Voorkeur 4'].
    Voorkeur 1 has parents ['Voorkeur 5', 'Voorkeur 2', 'Voorkeur 4'].
havo/vwo :
Constructed Bayesian network:
    Voorkeur 3 has parents ['Voorkeur 4'].
    Voorkeur 5 has parents ['Voorkeur 3', 'Voorkeur 4'].
    Voorkeur 2 has parents ['Voorkeur 3', 'Voorkeur 5', 'Voorkeur 4'].
    Voorkeur 1 has parents ['Voorkeur 5', 'Voorkeur 2', 'Voorkeur 4'].
vwo :
Constructed Bayesian network:
    Voorkeur 3 has parents ['Voorkeur 4'].
    Voorkeur 5 has parents ['Voorkeur 4'].
    Voorkeur 2 has parents ['Voorkeur 4'].
    Voorkeur 1 has parents ['Voorkeur 5'].
vmbo-b/k :


### Graphical comparison

In [6]:
alpha_synthetic = pd.read_csv('synthetic_datasets/model_alpha_synthetic.csv')
beta_synthetic = pd.read_csv('synthetic_datasets/model_beta_synthetic.csv')

In [7]:
encoded_alpha_synthetic = encoding(alpha_synthetic, schools)
encoded_alpha_synthetic.head()

Unnamed: 0,Basisschool advies,Voorkeur 1,Voorkeur 2,Voorkeur 3,Voorkeur 4,Voorkeur 5,Voorkeur 6,Voorkeur 7,Voorkeur 8,Voorkeur 9,...,Voorkeur 13,Voorkeur 14,Voorkeur 15,Voorkeur 16,Voorkeur 17,Voorkeur 18,Voorkeur 19,Voorkeur 20,Voorkeur 21,Voorkeur 22
0,havo,68,99,81,102,20,132,137,42,30,...,181,181,181,181,181,181,181,181,181,181
1,havo,138,104,56,28,37,73,45,96,153,...,181,181,181,181,181,181,181,181,181,181
2,havo,30,20,13,127,137,5,40,109,139,...,181,181,181,181,181,181,181,181,181,181
3,havo,104,37,95,42,73,11,35,131,62,...,181,181,181,181,181,181,181,181,181,181
4,havo,129,11,162,40,45,113,151,2,135,...,181,181,181,181,181,181,181,181,181,181


In [8]:
encoded_beta_synthetic = encoding(beta_synthetic, schools)
encoded_beta_synthetic.head()

Unnamed: 0,Basisschool advies,Voorkeur 1,Voorkeur 2,Voorkeur 3,Voorkeur 4,Voorkeur 5,Voorkeur 6,Voorkeur 7,Voorkeur 8,Voorkeur 9,...,Voorkeur 13,Voorkeur 14,Voorkeur 15,Voorkeur 16,Voorkeur 17,Voorkeur 18,Voorkeur 19,Voorkeur 20,Voorkeur 21,Voorkeur 22
0,havo,37,20,113,52,81,127,42,180,46,...,181,181,181,181,181,181,181,181,181,181
1,havo,81,40,129,102,29,115,180,48,68,...,181,181,181,181,181,181,181,181,181,181
2,havo,121,138,137,45,44,88,110,22,99,...,181,181,181,181,181,181,181,181,181,181
3,havo,127,64,109,135,5,35,142,104,73,...,181,181,181,181,181,181,181,181,181,181
4,havo,104,99,126,135,142,87,30,7,72,...,181,181,181,181,181,181,181,181,181,181


In [None]:
real_choice_col_bins = encoded_choices_df['Voorkeur 1'].value_counts().rename('real')
alpha_synthetic_choice_col_bins = alpha_synthetic['Voorkeur 1'].value_counts().rename('alpha_synthetic')
beta_synthetic_choice_col_bins = beta_synthetic['Voorkeur 1'].value_counts().rename('beta_synthetic')
compare_choice_col_bins = pd.concat([real_choice_col_bins, alpha_synthetic_choice_col_bins, beta_synthetic_choice_col_bins], axis=1).astype(float)

In [29]:
compare_choice_col_bins.head()

Unnamed: 0,real,alpha_synthetic,beta_synthetic
0,11.0,10.0,15.0
1,39.0,51.0,36.0
2,30.0,30.0,30.0
3,235.0,243.0,263.0
4,77.0,79.0,76.0


In [33]:
import plotly.graph_objects as go

indices = compare_choice_col_bins.index

fig = go.Figure()
fig.add_trace(go.Bar(
    x=indices,
    y=compare_choice_col_bins.real,
    name='Real',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=indices,
    y=compare_choice_col_bins.alpha_synthetic,
    name='Alpha Synthetic',
    marker_color='lightsalmon'
))
fig.add_trace(go.Bar(
    x=indices,
    y=compare_choice_col_bins.beta_synthetic,
    name='Beta Synthetic',
    marker_color='lightskyblue'
))

# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group')
fig.show()