In [1]:
import pandas as pd

In [2]:
import numpy as np
from pgmpy.models import MarkovNetwork
from pgmpy.factors.discrete import DiscreteFactor

In [3]:
data = pd.read_csv('./Diet_R.csv')
# clean the data from incomplete entries  
data = data.replace(' ', np.nan)    
data.dropna(inplace = True)

In [4]:
G=MarkovNetwork()

In [5]:
df=pd.DataFrame(data, columns=[ 'gender', 'Age', 'Height', 'pre.weight', 'Diet', 'weight6weeks'])

In [6]:
# replace range of the values to one value for deal with memory issues
gender = list(map(int, df['gender'].values.tolist()))
Age=pd.cut(df['Age'], bins = [20,25,30,35,40,45,50,55,60,65],labels=[20,25,30,35,40,45,50,55,60])
Height=pd.cut(df['Height'], bins = [150,160,170,180,190,200],labels=[150,160,170,180,190])
preWeight=pd.cut(df['pre.weight'], bins = [50,60,70,80,90,100],labels=[50,60,70,80,90])
Diet=df['Diet'].values.tolist()
weight6weeks=pd.cut(df['weight6weeks'], bins = [50,60,70,80,90,100],labels=[50,60,70,80,90])

In [7]:
def unique(list1): 
    x = np.array(list1) 
    return np.unique(x)

def two_param(list1, list2): 
    a = unique(list1) 
    b = unique(list2) 
    values_for_v1_v2 = [] 
    for aa  in a: 
        for bb in b:
            values_for_v1_v2.append(((list1==aa)&(list2==bb)).sum())
    return values_for_v1_v2

def three_param(list1, list2, list3): 
    a = unique(list1) 
    b = unique(list2) 
    c = unique(list3)
    values_for_v1_v2_v3 = [] 
    for aa  in a: 
        for bb in b:
            for cc in c:
                values_for_v1_v2_v3.append(((list1==aa)&(list2==bb)&(list3==cc)).sum())
    return values_for_v1_v2_v3

In [8]:
value1 = two_param(gender,Age)
value2 = three_param(gender,Age,Height)
value3 = three_param(Height, preWeight, Diet)
value4 = two_param(Age, weight6weeks)
value5 = three_param(gender,Diet,weight6weeks)

In [9]:
G.add_nodes_from([ 'gender', 'Age', 'Height', 'pre.weight', 'Diet', 'weight6weeks'])

In [10]:
G.add_edges_from([('gender', 'Age'), ('gender','Age','Height'), ('Height', 'pre.weight', 'Diet'), ('Age', 'weight6weeks'), ('gender','Diet','weight6weeks')])

In [11]:
factor1 = DiscreteFactor(['gender', 'Age'], cardinality=[2, 9], values=value1)
factor2 = DiscreteFactor(['gender','Age','Height'], cardinality=[2,9,6], values=value2)
factor3 = DiscreteFactor(['Height', 'pre.weight', 'Diet'], cardinality=[6,4,3], values=value3)
factor4 = DiscreteFactor(['Age', 'weight6weeks'], cardinality=[9, 4], values=value4)
factor5 = DiscreteFactor(['gender','Diet','weight6weeks'], cardinality=[2, 3, 4], values=value5)

In [12]:
G.add_factors(factor1)
G.add_factors(factor2)
G.add_factors(factor3)
G.add_factors(factor4)
G.add_factors(factor5)

In [13]:
G.get_factors()

[<DiscreteFactor representing phi(gender:2, Age:9) at 0x2402166b250>,
 <DiscreteFactor representing phi(gender:2, Age:9, Height:6) at 0x2407d9cda20>,
 <DiscreteFactor representing phi(Height:6, pre.weight:4, Diet:3) at 0x2402166b880>,
 <DiscreteFactor representing phi(Age:9, weight6weeks:4) at 0x240216b4fd0>,
 <DiscreteFactor representing phi(gender:2, Diet:3, weight6weeks:4) at 0x2407d97a800>]

In [14]:
G.get_local_independencies()

(gender ⟂ weight6weeks, pre.weight, Height | Diet, Age)
(Age ⟂ pre.weight, Diet, Height | weight6weeks, gender)
(Height ⟂ weight6weeks, Diet, Age, gender | pre.weight)
(pre.weight ⟂ weight6weeks, Diet, Age, gender | Height)
(Diet ⟂ weight6weeks, pre.weight, Age, Height | gender)
(weight6weeks ⟂ pre.weight, Diet, gender, Height | Age)

In [15]:
# initial data before normalization
Ptilda=factor1*factor2*factor3*factor4*factor5

In [16]:
P=Ptilda

In [17]:
print(P.marginalize([   'Age', 'Height', 'pre.weight', 'Diet', 'weight6weeks'], inplace=False))
print(P.marginalize([  'gender', 'Height', 'pre.weight', 'Diet', 'weight6weeks'], inplace=False))
print(P.marginalize([  'gender', 'Age', 'pre.weight', 'Diet', 'weight6weeks'], inplace=False))
print(P.marginalize([  'gender', 'Age', 'Height', 'Diet', 'weight6weeks'], inplace=False))
print(P.marginalize([  'gender', 'Age', 'Height', 'pre.weight', 'weight6weeks'], inplace=False))
print(P.marginalize([  'gender', 'Age', 'Height', 'pre.weight', 'Diet'], inplace=False))

+-----------+---------------+
| gender    |   phi(gender) |
| gender(0) |   251219.0000 |
+-----------+---------------+
| gender(1) |   275573.0000 |
+-----------+---------------+
+--------+-------------+
| Age    |    phi(Age) |
| Age(0) |   3070.0000 |
+--------+-------------+
| Age(1) |  23708.0000 |
+--------+-------------+
| Age(2) |  57795.0000 |
+--------+-------------+
| Age(3) | 289799.0000 |
+--------+-------------+
| Age(4) |  70551.0000 |
+--------+-------------+
| Age(5) |  57017.0000 |
+--------+-------------+
| Age(6) |  22344.0000 |
+--------+-------------+
| Age(7) |   2508.0000 |
+--------+-------------+
| Age(8) |      0.0000 |
+--------+-------------+
+-----------+---------------+
| Height    |   phi(Height) |
| Height(0) |    39093.0000 |
+-----------+---------------+
| Height(1) |   278631.0000 |
+-----------+---------------+
| Height(2) |   181515.0000 |
+-----------+---------------+
| Height(3) |     6714.0000 |
+-----------+---------------+
| Height(4) |    208

In [18]:
#normilization
P.normalize()

In [19]:
P_gender=P.marginalize([  'Age', 'Height', 'pre.weight', 'Diet', 'weight6weeks'], inplace=False)
print(P_gender)

+-----------+---------------+
| gender    |   phi(gender) |
| gender(0) |        0.4769 |
+-----------+---------------+
| gender(1) |        0.5231 |
+-----------+---------------+


In [20]:
P_Age = P.marginalize([  'gender', 'Height', 'pre.weight', 'Diet', 'weight6weeks'], inplace=False)
print(P_Age)

+--------+------------+
| Age    |   phi(Age) |
| Age(0) |     0.0058 |
+--------+------------+
| Age(1) |     0.0450 |
+--------+------------+
| Age(2) |     0.1097 |
+--------+------------+
| Age(3) |     0.5501 |
+--------+------------+
| Age(4) |     0.1339 |
+--------+------------+
| Age(5) |     0.1082 |
+--------+------------+
| Age(6) |     0.0424 |
+--------+------------+
| Age(7) |     0.0048 |
+--------+------------+
| Age(8) |     0.0000 |
+--------+------------+


In [21]:
P_Height = P.marginalize([  'gender', 'Age', 'pre.weight', 'Diet', 'weight6weeks'], inplace=False)
print(P_Height)

+-----------+---------------+
| Height    |   phi(Height) |
| Height(0) |        0.0742 |
+-----------+---------------+
| Height(1) |        0.5289 |
+-----------+---------------+
| Height(2) |        0.3446 |
+-----------+---------------+
| Height(3) |        0.0127 |
+-----------+---------------+
| Height(4) |        0.0396 |
+-----------+---------------+
| Height(5) |        0.0000 |
+-----------+---------------+


In [22]:
P_preWeight = P.marginalize([  'gender', 'Age', 'Height', 'Diet', 'weight6weeks'], inplace=False)
print(P_preWeight)

+---------------+-------------------+
| pre.weight    |   phi(pre.weight) |
| pre.weight(0) |            0.0782 |
+---------------+-------------------+
| pre.weight(1) |            0.3667 |
+---------------+-------------------+
| pre.weight(2) |            0.4155 |
+---------------+-------------------+
| pre.weight(3) |            0.1396 |
+---------------+-------------------+


In [23]:
P_Diet = P.marginalize([  'gender', 'Age', 'Height', 'pre.weight', 'weight6weeks'], inplace=False)
print(P_Diet)

+---------+-------------+
| Diet    |   phi(Diet) |
| Diet(0) |      0.2984 |
+---------+-------------+
| Diet(1) |      0.3211 |
+---------+-------------+
| Diet(2) |      0.3805 |
+---------+-------------+


In [24]:
P_weight6weeks = P.marginalize([  'gender', 'Age', 'Height', 'pre.weight', 'Diet'], inplace=False)
print(P_weight6weeks)

+-----------------+---------------------+
| weight6weeks    |   phi(weight6weeks) |
| weight6weeks(0) |              0.0484 |
+-----------------+---------------------+
| weight6weeks(1) |              0.4223 |
+-----------------+---------------------+
| weight6weeks(2) |              0.4768 |
+-----------------+---------------------+
| weight6weeks(3) |              0.0525 |
+-----------------+---------------------+


In [None]:
# Comparing to the value before normalization the normalized values is less 