In [13]:
import matplotlib.pyplot as plt
import nltk 
import numpy as np
import pandas as pd 
import seaborn as sns

from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.feature_extraction.text import TfidfVectorizer

from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
from pgmpy.estimators import MaximumLikelihoodEstimator

In [None]:
#!pip install pgmpy # https://pgmpy.org/detailed_notebooks/10.%20Learning%20Bayesian%20Networks%20from%20Data.html
# https://pgmpy.org/metrics/metrics.html

In [8]:
df = pd.read_csv("transport.csv")
df = df.drop(columns="Unnamed: 0")
df

Unnamed: 0,Age,Income,Education,Residency,Transport
0,middle,low,high,big,train
1,young,low,low,big,car
2,middle,low,low,small,car
3,old,low,high,big,car
4,middle,medium,high,small,car
...,...,...,...,...,...
995,middle,medium,low,small,car
996,middle,high,high,big,car
997,middle,medium,high,small,car
998,middle,medium,high,big,car


In [12]:
from pgmpy.factors.discrete.CPD import TabularCPD

def print_full(cpd):
    backup = TabularCPD._truncate_strtable
    TabularCPD._truncate_strtable = lambda self, x: x
    print(cpd)
    TabularCPD._truncate_strtable = backup

In [68]:
def do_bayesian_network(network):
    model = BayesianNetwork(network)
    model.fit(df, estimator=MaximumLikelihoodEstimator)

    # Check the learned CPDs
    print(model.get_cpds())
    
    model.check_model()
    print_full(model)
    print_mle(model)

def print_mle(model):
    mle = MaximumLikelihoodEstimator(model, df)

    print(mle.estimate_cpd("Age"))  # unconditional
    print(mle.estimate_cpd("Residency"))  # conditional
    print(mle.estimate_cpd("Education"))  # conditional
    print(mle.estimate_cpd("Income"))  # conditional
    print(mle.estimate_cpd("Transport"))  # conditional

In [69]:
do_bayesian_network([("Age", "Income"), ("Age", "Education"), ("Income", "Transport"), ("Income", "Residency")])

[<TabularCPD representing P(Age:3) at 0x19f6de65240>, <TabularCPD representing P(Income:3 | Age:3) at 0x19f6de666b0>, <TabularCPD representing P(Education:2 | Age:3) at 0x19f6de64bb0>, <TabularCPD representing P(Transport:2 | Income:3) at 0x19f6de66b90>, <TabularCPD representing P(Residency:2 | Income:3) at 0x19f6e01c790>]
BayesianNetwork with 5 nodes and 4 edges
+-------------+-------+
| Age(middle) | 0.538 |
+-------------+-------+
| Age(old)    | 0.2   |
+-------------+-------+
| Age(young)  | 0.262 |
+-------------+-------+
+------------------+-----+---------------------+
| Income           | ... | Income(medium)      |
+------------------+-----+---------------------+
| Residency(big)   | ... | 0.5674740484429066  |
+------------------+-----+---------------------+
| Residency(small) | ... | 0.43252595155709345 |
+------------------+-----+---------------------+
+-----------------+---------------------+----------+---------------------+
| Age             | Age(middle)         | Age(ol