### 2γ) Bayesian Networks

In [57]:
import pgmpy
import pandas as pd
import numpy as np
import os
pgmpy.__version__


'0.1.25'

In [58]:
CSV_PATH = os.path.join('harth')
df6 = pd.read_csv(CSV_PATH + "/S006.csv")
df20 = pd.read_csv(CSV_PATH + "/S020.csv")
df28 = pd.read_csv(CSV_PATH + "/S028.csv")
df15 = pd.read_csv(CSV_PATH + "/S015.csv")
ultimate_df = pd.concat([df6, df20, df28, df15])

data_bayes_train = ultimate_df[['back_x', 'back_y', 'back_z', 'thigh_x', 'thigh_y', 'thigh_z', 'label']]
data_bayes_train.dropna()
data_bayes_train.drop_duplicates()

Unnamed: 0,back_x,back_y,back_z,thigh_x,thigh_y,thigh_z,label
0,-0.760242,0.299570,0.468570,-5.092732,-0.298644,0.709439,6
1,-0.530138,0.281880,0.319987,0.900547,0.286944,0.340309,6
2,-1.170922,0.186353,-0.167010,-0.035442,-0.078423,-0.515212,6
3,-0.648772,0.016579,-0.054284,-1.554248,-0.950978,-0.221140,6
4,-0.355071,-0.051831,-0.113419,-0.547471,0.140903,-0.653782,6
...,...,...,...,...,...,...,...
418387,-0.995198,0.003628,-0.013364,-0.948044,0.011695,-0.318949,6
418388,-0.989947,0.011355,-0.012790,-0.949454,0.013267,-0.322742,6
418389,-0.998318,-0.001806,-0.013925,-0.947970,0.004826,-0.318672,6
418390,-0.987699,0.010354,-0.011789,-0.949501,-0.006672,-0.323341,6


Δημιουργούμε το Μοντέλο

In [65]:
from pgmpy.models import BayesianNetwork
bayesian = BayesianNetwork([('back_x', 'label'), 
                            ('back_y', 'label'), 
                            ('back_z', 'label'), 
                            ('thigh_x', 'label'),
                            ('thigh_y', 'label'),
                            ('thigh_z', 'label'), 
                            #('thigh_x','thigh_y','thigh_z'),
                            #('back_x','back_y','back_z')
                            ])


In [66]:
from pgmpy.inference import VariableElimination
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate_bn(df, bn):
    predictor = VariableElimination(bn)
    results = []
    classes = {}
    i = 0
    # Output Normalization
    for index, c in enumerate(df['label']):
        if c not in classes.values():
            classes[i] = c
            i+=1

    for index, row in df.iterrows():
        predicted_distribution = predictor.query(variables=['label'], evidence=None)
        predicted_label = predicted_distribution.values.argmax()
        results.append(classes[predicted_label])
    
    precision = precision_score(df['label'], results, average='weighted')
    recall = recall_score(df['label'], results, average='weighted')
    f1 = f1_score(df['label'], results, average='weighted')

    print(F"Precision: {precision:.4f}, Recall: {recall:.4f}, f1: {f1:.4f}")

    return precision, recall, f1
    #return results

In [67]:
from sklearn.model_selection import train_test_split

train_bayes, test_bayes = train_test_split(data_bayes_train, test_size=0.3, random_state=42)
train_bayes = train_bayes.sample(500, random_state=42)
test_bayes = test_bayes.sample(500, random_state=42)
samples = len(train_bayes) // 10

In [68]:
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
sampled_data = train_bayes[0:10]
cpd_back_x =  BayesianEstimator(bayesian, sampled_data).estimate_cpd('back_x')
cpd_back_y =  BayesianEstimator(bayesian, sampled_data).estimate_cpd('back_y')
cpd_back_z =  BayesianEstimator(bayesian, sampled_data).estimate_cpd('back_z')
cpd_thigh_x = BayesianEstimator(bayesian, sampled_data).estimate_cpd('thigh_x')
cpd_thigh_y = BayesianEstimator(bayesian, sampled_data).estimate_cpd('thigh_y')
cpd_thigh_z = BayesianEstimator(bayesian, sampled_data).estimate_cpd('thigh_z')
cpd_label =   BayesianEstimator(bayesian, sampled_data).estimate_cpd('label')

bayesian.add_cpds(cpd_back_x,cpd_back_y,cpd_back_z,cpd_thigh_x,cpd_thigh_y,cpd_thigh_z,cpd_label)
bayesian.check_model()

True

In [None]:
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
precision_metrics = []
recall_metrics = []
f1_metrics = []

for i in range(10, samples):
    sampled_data = train_bayes[i:i+10]
    sampled_test = test_bayes[i:i+10]
    cpd_back_x =  BayesianEstimator(bayesian, sampled_data).estimate_cpd('back_x')
    cpd_back_y =  BayesianEstimator(bayesian, sampled_data).estimate_cpd('back_y')
    cpd_back_z =  BayesianEstimator(bayesian, sampled_data).estimate_cpd('back_z')
    cpd_thigh_x = BayesianEstimator(bayesian, sampled_data).estimate_cpd('thigh_x')
    cpd_thigh_y = BayesianEstimator(bayesian, sampled_data).estimate_cpd('thigh_y')
    cpd_thigh_z = BayesianEstimator(bayesian, sampled_data).estimate_cpd('thigh_z')
    cpd_label =   BayesianEstimator(bayesian, sampled_data).estimate_cpd('label')
    bayesian.add_cpds(cpd_back_x,cpd_back_y,cpd_back_z,cpd_thigh_x,cpd_thigh_y,cpd_thigh_z,cpd_label)
    prec_m, recl_m, f1_m = evaluate_bn(test_bayes, bayesian)
    precision_metrics.append(prec_m)
    recall_metrics.append(recl_m)
    f1_metrics.append(f1_m)
    
bayesian.check_model()

In [70]:
avg_precision = np.average(precision_metrics)
max_precision = np.max(precision_metrics)
avg_recall = np.average(recall_metrics)
max_recall = np.max(recall_metrics)
avg_f1 = np.average(f1_metrics)
max_f1 = np.max(f1_metrics)

print(f'Average Precision: {avg_precision:.4f} and Max: {max_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f} and Max: {max_recall:.4f}')
print(f'Average f1: {avg_f1:.4f} and Max: {max_f1:.4f}')


Average Precision: 0.0294 and Max: 0.2560
Average Recall: 0.1442 and Max: 0.5060
Average f1: 0.0459 and Max: 0.3400
