In [443]:
# DEVOIR 2
# PARTIE V: MODÈLE GRAPHIQUE PROBABILISTE
# Note: Dans ce document, les parties A à C sont pour construire le réseau (2.5a)
# La partie D est pour faire les prédictions (2.5b)
import numpy as np
import pandas as pd
import json
from pomegranate import DiscreteDistribution, ConditionalProbabilityTable
from pomegranate import Node, BayesianNetwork
from itertools import combinations
from sklearn.metrics import accuracy_score

In [292]:
dataC = pd.read_csv('dataC.csv', index_col=0)
dataC.head()

Unnamed: 0_level_0,GDP,Internet Speed,Consumption of Pure Alcohol,Intentional Homicide Victims,Military Expenditures,Human Development Index,Democracy Index,Tertiary Education,Importance of Religion (in %),% of Christians,...,Incarceration Rate (per 100 000),Literacy Rate,Age at First Marriage,Spending on Education (in %),Homeless Population (per 10 000),Milk Consumption,Num. of Scientific and Technical Journals Articles,Books Published,Kilocalories,Avg. Yearly Temperature (in Celsius)
COUNTRY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Albania,0,0,1,0,1,1,1,0,0,0,...,1,1,0,0,0,1,0,0,1,0
Algeria,0,0,0,0,0,0,0,0,1,0,...,1,0,1,0,1,1,0,1,1,1
Angola,0,1,1,1,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,1
Argentina,1,0,1,1,0,1,1,0,0,1,...,1,1,1,1,1,1,1,1,1,0


In [293]:
# PARTIE A: DÉTERMINER LES PARENTS ET ENFANTS DU GRAPHE

In [294]:
# 1. Utiliser corr.json pour évaluer la corrélation entre deux variables (paire)
with open('corr.json', 'r') as f:
    corr_list = json.load(f)

# Matrice de force de corrélation 40 par 40
corr_matrix = np.array(corr_list)
corr_matrix = np.abs(corr_matrix)

# Il existe 780 paires possibles
pairs = list(combinations(np.arange(40), 2))

#2. Assigner chaque paire à sa corrélation correspondante
corr_pairs = np.zeros(len(pairs))

for i in range(corr_pairs.shape[0]):
    chosen_pair = pairs[i]
    corr_pairs[i] = corr_matrix[chosen_pair[0], chosen_pair[1]]

#3. Trier en ordre décroissant par force de corrélation et créer un tableau d'indices
corr_index = np.argsort(-1*corr_pairs)

#4. Créer un tableau pour déterminer les parents de chaque élément
# Ex.: Si parents[0] = [2, 4, -1, -1], le feature 0 a deux parents qui sont 2 et 4.
# Ex.: -1 veut dire qu'il n'existe pas de troisième ou de quatrième parent dans ce cas-ci.
parents = np.ones((40,4), dtype = int)*-1
# Compte le nombre de parents pour chaque enfant
count_parents = np.zeros(40, dtype = int)

#5. Remplir le tableau 
with open('ordre.json', 'r') as f:
    order_list = json.load(f)

# Permettre de déterminer le sens de la flèche et de remplir la matrice "parents"
order = np.array(order_list)

nb_edges = 1
corr_iter = 0
is_valid_edge = True

while nb_edges <= 100:
    # Commencer par les paires avec la force de corrélation la plus grande
    chosen_pair = pairs[corr_index[corr_iter]]
    print(f'Corrélation pour paire {chosen_pair}: {corr_pairs[corr_index[corr_iter]]}')
    
    # Déterminer relation parent-enfant à l'aide de ordre.json
    if list(order).index(chosen_pair[0]) < list(order).index(chosen_pair[1]):
        child = chosen_pair[1]
        parent = chosen_pair[0]
    else:
        child = chosen_pair[0]
        parent = chosen_pair[1]
    
    print(f'Enfant {child} et parent {parent}')
    if count_parents[child] != 4:
        count_parents[child] = count_parents[child] + 1
        parents[child][count_parents[child] - 1] = parent
        print(f'Paire acceptée \n')
    else:
        is_valid_edge = False
        print(f'Paire refusée \n')
    
    if is_valid_edge:
        nb_edges = nb_edges + 1
    is_valid_edge = True
    corr_iter = corr_iter + 1

Corrélation pour paire (0, 15): 0.9158065192304322
Enfant 0 et parent 15
Paire acceptée 

Corrélation pour paire (15, 36): 0.9078890563646058
Enfant 15 et parent 36
Paire acceptée 

Corrélation pour paire (5, 28): 0.8967764268508257
Enfant 28 et parent 5
Paire acceptée 

Corrélation pour paire (5, 24): 0.8934695681526009
Enfant 24 et parent 5
Paire acceptée 

Corrélation pour paire (5, 23): 0.8928422991811447
Enfant 23 et parent 5
Paire acceptée 

Corrélation pour paire (13, 28): 0.887896200940255
Enfant 13 et parent 28
Paire acceptée 

Corrélation pour paire (5, 13): 0.8864878038787631
Enfant 13 et parent 5
Paire acceptée 

Corrélation pour paire (20, 24): 0.8849722125977192
Enfant 20 et parent 24
Paire acceptée 

Corrélation pour paire (13, 20): 0.8845540280638439
Enfant 13 et parent 20
Paire acceptée 

Corrélation pour paire (0, 36): 0.864531309299911
Enfant 0 et parent 36
Paire acceptée 

Corrélation pour paire (24, 27): 0.8639757858079944
Enfant 27 et parent 24
Paire acceptée 

Co

In [295]:
print(count_parents)
parents

[4 4 4 1 2 0 4 0 4 2 4 0 0 4 0 4 0 4 4 0 4 3 4 3 1 4 1 4 2 4 0 4 4 0 0 4 4
 1 4 4]


array([[15, 36, 29,  5],
       [25, 36,  0, 15],
       [ 8, 31, 24, 27],
       [17, -1, -1, -1],
       [34,  7, -1, -1],
       [-1, -1, -1, -1],
       [36, 15,  5, 32],
       [-1, -1, -1, -1],
       [24, 31, 36,  5],
       [10,  2, -1, -1],
       [ 2,  6, 31, 27],
       [-1, -1, -1, -1],
       [-1, -1, -1, -1],
       [28,  5, 20, 24],
       [-1, -1, -1, -1],
       [36, 29,  5, 28],
       [-1, -1, -1, -1],
       [39, 35, 24, 38],
       [ 0, 15, 36, 29],
       [-1, -1, -1, -1],
       [24,  5, 28, 23],
       [27, 20, 39, -1],
       [38,  5, 13, 32],
       [ 5, 28, 24, -1],
       [ 5, -1, -1, -1],
       [36, 15,  0,  5],
       [ 0, -1, -1, -1],
       [24, 20, 31, 13],
       [ 5, 24, -1, -1],
       [ 5, 32, 36, 28],
       [-1, -1, -1, -1],
       [24,  5, 13, 20],
       [ 5, 24, 28, 23],
       [-1, -1, -1, -1],
       [-1, -1, -1, -1],
       [ 5, 24, 38, 36],
       [ 5, 32, 28, 24],
       [18, -1, -1, -1],
       [ 5, 23, 24, 28],
       [ 8, 35, 31, 24]])

In [296]:
# Commentaires: on remarque même si on a une corrélation de 0.40 par exemple, l'algorithme accepte quand même la paire pour
# raisons:
# 1. Le nombre de flèches doit être 100.
# 2. Plusieurs flèches sont rejetées (avec une force très élevée), car le nombre de parents dépasse 4.

In [297]:
# PARTIE B. CALCULER LES PROBABILTIÉS CONDITIONNELLES

In [298]:
'''
CALCULER PROBABILITÉ CONDITIONNELLE
2. Créer une variable qui contient 40 DiscreteDistributions et ConditionalDistribution et la remplir
3. Convertir toutes les variables en noeuds
'''

'\nCALCULER PROBABILITÉ CONDITIONNELLE\n2. Créer une variable qui contient 40 DiscreteDistributions et ConditionalDistribution et la remplir\n3. Convertir toutes les variables en noeuds\n'

In [299]:
# Retourne une distribution discrète sur le noeud avec aucun parent
def dist_no_parent(feature):
    prob_false = dataC[dataC == 0].count()[feature] / dataC.shape[0]
    prob_true = dataC[dataC == 1].count()[feature] / dataC.shape[0]
    
    return DiscreteDistribution({False: prob_false, True: prob_true})

In [327]:
# Retourne la probabilité conditionnelle

# Exemple, supposons que array_bool = [0,1].
# Cet attribut a un seul parent où le parent = 0 (False) et l'enfant = 1 (True).
# Pour estimer la probabilité conditionnelle, on divise le nombre de pays qui possède un "0" à 
# l'attribut-parent et un "1" à l'attribut-enfant, par le nombre de pays qui possède un "0" à l'attribut-parent.

# Cet estimation est basée sur la formule de la probabilité conditionnelle.

# fc: attribut de l'enfant
# fp1: attribut du 1er parent (même principe que fp2 fp3 et fp4)

def calculate_cond_prob(feature, array_bool):
    child = feature
    fc = dataC.iloc[0,:].index[child]
    
    # Un seul parent
    if len(array_bool) == 2:
        parent_1 = parents[child][0]
        
        #print(parent_1)
        
        fp1 = dataC.iloc[0,:].index[parent_1]
        
        df_intersection = dataC[[fp1, fc]][(dataC[fp1]==array_bool[0]) & (dataC[fc]==array_bool[1])]
        df_parents = dataC[[fp1]][(dataC[fp1]==array_bool[0])]
    
    # Deux parents
    if len(array_bool) == 3:
        parent_1 = parents[child][0]
        parent_2 = parents[child][1]
        
        #print(f'{parent_1} et {parent_2}')
        
        fp1 = dataC.iloc[0,:].index[parent_1]
        fp2 = dataC.iloc[0,:].index[parent_2]
        
        df_intersection = dataC[[fp1, fp2, fc]][(dataC[fp1]==array_bool[0]) & (dataC[fp2]==array_bool[1]) & (dataC[fc]==array_bool[2])]
        df_parents = dataC[[fp1, fp2]][(dataC[fp1]==array_bool[0]) & (dataC[fp2]==array_bool[1])]
    
    # Trois parents
    if len(array_bool) == 4:
        parent_1 = parents[child][0]
        parent_2 = parents[child][1]
        parent_3 = parents[child][2]
        
        #print(f'{parent_1} et {parent_2} et {parent_3}')
        
        fp1 = dataC.iloc[0,:].index[parent_1]
        fp2 = dataC.iloc[0,:].index[parent_2]
        fp3 = dataC.iloc[0,:].index[parent_3]
        
        df_intersection = dataC[[fp1, fp2, fp3, fc]][(dataC[fp1]==array_bool[0]) & (dataC[fp2]==array_bool[1]) & (dataC[fp3]==array_bool[2]) & (dataC[fc]==array_bool[3])]
        df_parents = dataC[[fp1, fp2, fp3]][(dataC[fp1]==array_bool[0]) & (dataC[fp2]==array_bool[1]) & (dataC[fp3]==array_bool[2])]
        
    
    # Quatre parents
    if len(array_bool) == 5:
        parent_1 = parents[child][0]
        parent_2 = parents[child][1]
        parent_3 = parents[child][2]
        parent_4 = parents[child][3]
        
        #print(f'{parent_1} et {parent_2} et {parent_3} et {parent_4}')
        
        fp1 = dataC.iloc[0,:].index[parent_1]
        fp2 = dataC.iloc[0,:].index[parent_2]
        fp3 = dataC.iloc[0,:].index[parent_3]
        fp4 = dataC.iloc[0,:].index[parent_4]
        
        df_intersection = dataC[[fp1, fp2, fp3, fp4, fc]][(dataC[fp1]==array_bool[0]) & (dataC[fp2]==array_bool[1]) & (dataC[fp3]==array_bool[2]) & (dataC[fp4]==array_bool[3]) & (dataC[fc]==array_bool[4])]
        df_parents = dataC[[fp1, fp2, fp3, fp4]][(dataC[fp1]==array_bool[0]) & (dataC[fp2]==array_bool[1]) & (dataC[fp3]==array_bool[2]) & (dataC[fp4]==array_bool[3])]        
    
    #return df_intersection
    #return df_parents
    if df_intersection.shape[0] == df_parents.shape[0] and df_parents.shape[0] == 0:
        # Éviter les divisions par zéro
        return 0.0
    else:
        return df_intersection.shape[0]/df_parents.shape[0]

In [337]:
# Retourne une distribution conditionnelle sur le noeud avec un seul parent
def dist_one_parent(feature, variables):
    tableau = [[False, False, calculate_cond_prob(feature, [0,0])],
               [False, True , calculate_cond_prob(feature, [0,1])],
             
               [True , False, calculate_cond_prob(feature, [1,0])],
               [True , True , calculate_cond_prob(feature, [1,1])]]
    
    parent_1 = parents[feature][0]
    
    return ConditionalProbabilityTable(tableau, [variables[parent_1]])

In [338]:
def dist_two_parents(feature, variables):
    tableau = [[False, False, False, calculate_cond_prob(feature, [0,0,0])],
               [False, False, True , calculate_cond_prob(feature, [0,0,1])],
            
               [False, True , False, calculate_cond_prob(feature, [0,1,0])],
               [False, True , True , calculate_cond_prob(feature, [0,1,1])],
             
               [True , False, False, calculate_cond_prob(feature, [1,0,0])],
               [True , False, True , calculate_cond_prob(feature, [1,0,1])],
            
               [True , True , False, calculate_cond_prob(feature, [1,1,0])],
               [True , True , True , calculate_cond_prob(feature, [1,1,1])]]
    
    parent_1 = parents[feature][0]
    parent_2 = parents[feature][1]
    
    return ConditionalProbabilityTable(tableau, [variables[parent_1], variables[parent_2]])

In [339]:
def dist_three_parents(feature, variables):
    tableau = [[False, False, False, False, calculate_cond_prob(feature, [0,0,0,0])],
               [False, False, False, True , calculate_cond_prob(feature, [0,0,0,1])],
            
               [False, False, True , False, calculate_cond_prob(feature, [0,0,1,0])],
               [False, False, True , True , calculate_cond_prob(feature, [0,0,1,1])],
             
               [False, True , False, False, calculate_cond_prob(feature, [0,1,0,0])],
               [False, True , False, True , calculate_cond_prob(feature, [0,1,0,1])],
            
               [False, True , True , False, calculate_cond_prob(feature, [0,1,1,0])],
               [False, True , True , True , calculate_cond_prob(feature, [0,1,1,1])],
            
               [True , False, False, False, calculate_cond_prob(feature, [1,0,0,0])],
               [True , False, False, True , calculate_cond_prob(feature, [1,0,0,1])],
            
               [True , False, True , False, calculate_cond_prob(feature, [1,0,1,0])],
               [True , False, True , True , calculate_cond_prob(feature, [1,0,1,1])],
             
               [True , True , False, False, calculate_cond_prob(feature, [1,1,0,0])],
               [True , True , False, True , calculate_cond_prob(feature, [1,1,0,1])],
            
               [True , True , True , False, calculate_cond_prob(feature, [1,1,1,0])],
               [True , True , True , True , calculate_cond_prob(feature, [1,1,1,1])]]
    
    parent_1 = parents[feature][0]
    parent_2 = parents[feature][1]
    parent_3 = parents[feature][2]
    
    return ConditionalProbabilityTable(tableau, [variables[parent_1], variables[parent_2], variables[parent_3]])

In [340]:
def dist_four_parents(feature, variables):
    tableau = [[False, False, False, False, False, calculate_cond_prob(feature, [0,0,0,0,0])],
               [False, False, False, False, True , calculate_cond_prob(feature, [0,0,0,0,1])],
            
               [False, False, False, True , False, calculate_cond_prob(feature, [0,0,0,1,0])],
               [False, False, False, True , True , calculate_cond_prob(feature, [0,0,0,1,1])],
                 
               [False, False, True , False, False, calculate_cond_prob(feature, [0,0,1,0,0])],
               [False, False, True , False, True , calculate_cond_prob(feature, [0,0,1,0,1])],
               
               [False, False, True , True , False, calculate_cond_prob(feature, [0,0,1,1,0])],
               [False, False, True , True , True , calculate_cond_prob(feature, [0,0,1,1,1])],
                      
               [False, True , False, False, False, calculate_cond_prob(feature, [0,1,0,0,0])],
               [False, True , False, False, True , calculate_cond_prob(feature, [0,1,0,0,1])],
            
               [False, True , False, True , False, calculate_cond_prob(feature, [0,1,0,1,0])],
               [False, True , False, True , True , calculate_cond_prob(feature, [0,1,0,1,1])],
             
               [False, True , True , False, False, calculate_cond_prob(feature, [0,1,1,0,0])],
               [False, True , True , False, True , calculate_cond_prob(feature, [0,1,1,0,1])],
            
               [False, True , True , True , False, calculate_cond_prob(feature, [0,1,1,1,0])],
               [False, True , True , True , True , calculate_cond_prob(feature, [0,1,1,1,1])],
            
               [True , False, False, False, False, calculate_cond_prob(feature, [1,0,0,0,0])],
               [True , False, False, False, True , calculate_cond_prob(feature, [1,0,0,0,1])],
            
               [True , False, False, True , False, calculate_cond_prob(feature, [1,0,0,1,0])],
               [True , False, False, True , True , calculate_cond_prob(feature, [1,0,0,1,1])],
             
               [True , False, True , False, False, calculate_cond_prob(feature, [1,0,1,0,0])],
               [True , False, True , False, True , calculate_cond_prob(feature, [1,0,1,0,1])],
            
               [True , False, True , True , False, calculate_cond_prob(feature, [1,0,1,1,0])],
               [True , False, True , True , True , calculate_cond_prob(feature, [1,0,1,1,1])],
                      
               [True , True , False, False, False, calculate_cond_prob(feature, [1,1,0,0,0])],
               [True , True , False, False, True , calculate_cond_prob(feature, [1,1,0,0,1])],
            
               [True , True , False, True , False, calculate_cond_prob(feature, [1,1,0,1,0])],
               [True , True , False, True , True , calculate_cond_prob(feature, [1,1,0,1,1])],
             
               [True , True , True , False, False, calculate_cond_prob(feature, [1,1,1,0,0])],
               [True , True , True , False, True , calculate_cond_prob(feature, [1,1,1,0,1])],
            
               [True , True , True , True , False, calculate_cond_prob(feature, [1,1,1,1,0])],
               [True , True , True , True , True , calculate_cond_prob(feature, [1,1,1,1,1])]]
    
    parent_1 = parents[feature][0]
    parent_2 = parents[feature][1]
    parent_3 = parents[feature][2]
    parent_4 = parents[feature][3]
    
    return ConditionalProbabilityTable(tableau, [variables[parent_1], variables[parent_2], variables[parent_3], variables[parent_4]])

In [341]:
print(np.where(count_parents == 0))
print(np.where(count_parents == 1))
print(np.where(count_parents == 2))
print(np.where(count_parents == 3))
print(np.where(count_parents == 4))

(array([ 5,  7, 11, 12, 14, 16, 19, 30, 33, 34], dtype=int64),)
(array([ 3, 24, 26, 37], dtype=int64),)
(array([ 4,  9, 28], dtype=int64),)
(array([21, 23], dtype=int64),)
(array([ 0,  1,  2,  6,  8, 10, 13, 15, 17, 18, 20, 22, 25, 27, 29, 31, 32,
       35, 36, 38, 39], dtype=int64),)


In [352]:
variables = np.zeros(40, dtype = object)
variables_completed = 1
iteration = 0

while variables_completed <= 40:
    #print(f'variables_completed: {variables_completed} et itération: {iteration}')
    
    # Si déjà rempli, on "skip" cette itération
    if variables[iteration] != 0:
        if iteration == 39:
            iteration = 0
        else:
            iteration = iteration + 1
        continue
    
    # Pour zéro parent
    if count_parents[iteration] == 0:
        variables[iteration] = dist_no_parent(iteration)
        variables_completed = variables_completed + 1
    
    # pour un parent
    elif count_parents[iteration] == 1:
        parent_1 = parents[iteration][0]
        
        if variables[parent_1] != 0:
            variables[iteration] = dist_one_parent(iteration, variables)
            variables_completed = variables_completed + 1
    
    # pour deux parents
    elif count_parents[iteration] == 2:
        parent_1 = parents[iteration][0]
        parent_2 = parents[iteration][1]
        
        if (variables[parent_1] != 0) & (variables[parent_2] != 0):
            variables[iteration] = dist_two_parents(iteration, variables)
            variables_completed = variables_completed + 1
    
    # pour trois parents
    elif count_parents[iteration] == 3:
        parent_1 = parents[iteration][0]
        parent_2 = parents[iteration][1]
        parent_3 = parents[iteration][2]
        
        if (variables[parent_1] != 0) & (variables[parent_2] != 0) & (variables[parent_3] != 0):
            variables[iteration] = dist_three_parents(iteration, variables)
            variables_completed = variables_completed + 1
    
    # pour quatre parents
    elif count_parents[iteration] == 4:
        parent_1 = parents[iteration][0]
        parent_2 = parents[iteration][1]
        parent_3 = parents[iteration][2]
        parent_4 = parents[iteration][3]
        
        if (variables[parent_1] != 0) & (variables[parent_2] != 0) & (variables[parent_3] != 0) & (variables[parent_4] != 0):
            variables[iteration] = dist_four_parents(iteration, variables)
            variables_completed = variables_completed + 1
    
    if iteration == 39:
        iteration = 0
    else:
        iteration = iteration + 1

In [361]:
nodes = np.zeros(variables.shape[0], dtype=object)

for i in range(nodes.shape[0]):
    nodes[i] = Node(variables[i], name=dataC.iloc[0,:].index[i])

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False])

In [387]:
# PARTIE C: CONSTRUCTION DU RÉSESAU BAYÉSIEN

In [416]:
bayesnet = BayesianNetwork("Réseau")
bayesnet.add_nodes(
    nodes[0] , nodes[1] , nodes[2] , nodes[3] , nodes[4] , nodes[5] , nodes[6] , nodes[7] , nodes[8] , nodes[9] ,
    nodes[10], nodes[11], nodes[12], nodes[13], nodes[14], nodes[15], nodes[16], nodes[17], nodes[18], nodes[19],
    nodes[20], nodes[21], nodes[22], nodes[23], nodes[24], nodes[25], nodes[26], nodes[27], nodes[28], nodes[29],
    nodes[30], nodes[31], nodes[32], nodes[33], nodes[34], nodes[35], nodes[36], nodes[37], nodes[38], nodes[39],
)

# reseau.add_edge(PARENT, ENFANT)
# i représente l'enfant
# j représente le parent de l'enfant i
for i in range(nodes.shape[0]):
    for j in range(parents.shape[1]):
        if parents[i][j] != -1:
            bayesnet.add_edge(nodes[j], nodes[i])

bayesnet.bake()

In [386]:
with open('reseau.json', 'w') as f:
    f.write(bayesnet.to_json())

In [394]:
# PARTIE D: PRÉDICTIONS

In [403]:
dataC

Unnamed: 0_level_0,GDP,Internet Speed,Consumption of Pure Alcohol,Intentional Homicide Victims,Military Expenditures,Human Development Index,Democracy Index,Tertiary Education,Importance of Religion (in %),% of Christians,...,Incarceration Rate (per 100 000),Literacy Rate,Age at First Marriage,Spending on Education (in %),Homeless Population (per 10 000),Milk Consumption,Num. of Scientific and Technical Journals Articles,Books Published,Kilocalories,Avg. Yearly Temperature (in Celsius)
COUNTRY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,0,0,0,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Albania,0,0,1,0,1,1,1,0,0,0,...,1,1,0,0,0,1,0,0,1,0
Algeria,0,0,0,0,0,0,0,0,1,0,...,1,0,1,0,1,1,0,1,1,1
Angola,0,1,1,1,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,1
Argentina,1,0,1,1,0,1,1,0,0,1,...,1,1,1,1,1,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,0,0,0,1,1,0,0,0,0,1,...,1,1,0,0,1,1,0,0,0,1
Vietnam,0,0,1,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,1,0,1
Yemen,0,1,0,1,1,0,0,0,1,0,...,0,0,0,1,1,0,0,0,0,1
Zambia,0,0,0,1,0,0,0,0,1,1,...,0,0,0,0,0,0,0,1,0,1


In [420]:
with open('reseau.json', 'r') as f:
    reseau = BayesianNetwork.from_json(f.read())

In [482]:
def get_mean_error(input_features, predicted_table):
    predicted_features = np.delete(np.arange(40), input_features)
    mean_error = 0.0
    
    for i in range(predicted_features.shape[0]):
        mean_error = mean_error + (1 - accuracy_score(predicted_table.T[i], dataC.values.T[i]))
    
    return mean_error / predicted_features.shape[0]

In [471]:
opt_index = 0
opt_pairs = np.zeros(2, dtype=int)

pairs = list(combinations(np.arange(40), 2))
risk_pairs = np.zeros(len(pairs))

y_predict = np.zeros(dataC.shape[0], dtype = int)

entry_list = [None] * 40
# Supposons pour la paire (0,1) - pairs[0]

predicted_table = np.array([])

for i in range(dataC.shape[0]):
    entry_list[0] = bool(dataC.iloc[i,0])
    entry_list[1] = bool(dataC.iloc[i,1])
    
    if i % 10 == 0:
        print(f'Itération {i} complétée')
    
    predicted_array = reseau.predict([entry_list])[0]
    predicted_array = [int(val) for val in predicted_array]
    
    if i == 0:
        predicted_table = predicted_array
    else:
        predicted_table = np.vstack((predicted_table, predicted_array))

risk_pairs[0] = get_mean_error(np.array(pairs[0]), predicted_table)
# TODO: for loop for all the pairs. Find the pairs with the smallest risk (optimal pairs)

Itération 0 complétée
Itération 10 complétée
Itération 20 complétée
Itération 30 complétée
Itération 40 complétée
Itération 50 complétée
Itération 60 complétée
Itération 70 complétée
Itération 80 complétée
Itération 90 complétée
Itération 100 complétée
Itération 110 complétée
Itération 120 complétée
Itération 130 complétée
Itération 140 complétée
Itération 150 complétée
