## Training Bayesian Network

### Get updated precision matrix from data

In [1]:
# Import libraries 
import pandas as pd 
import numpy as np

In [2]:
# Load CO2 and Ethylene gas mixture file
header_names = ["Time", "CO2", "Ethylene", "Sensor1", "Sensor2", "Sensor3", "Sensor4", "Sensor5", "Sensor6", "Sensor7", "Sensor8", "Sensor9", "Sensor10", "Sensor11", "Sensor12", "Sensor13", "Sensor14", "Sensor15", "Sensor16"]
df = pd.read_csv("data/gas-mixture/ethylene_CO.txt", delim_whitespace=True, skiprows=[0], header=None, names=header_names)

# Set time column as index
df = df.set_index("Time")

In [3]:
# Check data frame
df.head()

Unnamed: 0_level_0,CO2,Ethylene,Sensor1,Sensor2,Sensor3,Sensor4,Sensor5,Sensor6,Sensor7,Sensor8,Sensor9,Sensor10,Sensor11,Sensor12,Sensor13,Sensor14,Sensor15,Sensor16
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0.0,0.0,0.0,-50.85,-1.95,-41.82,1.3,-4.07,-28.73,-13.49,-3.25,55139.95,50669.5,9626.26,9762.62,24544.02,21420.68,7650.61,6928.42
0.01,0.0,0.0,-49.4,-5.53,-42.78,0.49,3.58,-34.55,-9.59,5.37,54395.77,50046.91,9433.2,9591.21,24137.13,20930.33,7498.79,6800.66
0.01,0.0,0.0,-40.04,-16.09,-27.59,0.0,-7.16,-42.14,-12.52,-5.86,53960.02,49299.3,9324.4,9449.81,23628.9,20504.94,7369.67,6697.47
0.03,0.0,0.0,-47.14,-10.57,-32.28,4.4,-11.22,-37.94,-7.16,-1.14,53047.71,48907.0,9170.64,9305.58,23101.66,20101.42,7285.13,6578.52
0.04,0.0,0.0,-33.58,-20.79,-33.25,6.03,3.42,-34.22,-14.46,8.31,52700.28,48330.96,9073.64,9163.47,22689.54,19694.07,7156.74,6468.32


In [4]:
# Transform data frame in numpy matrix
# data = (df.head(1000)).values
data = df.values

In [5]:
# Calculate mean of each column
x_mean = data.mean(0)

In [6]:
# Calculate s
s = data-x_mean
s = (s.T).dot(s)

In [7]:
# Calculate M
M = data.size

In [8]:
# Assuming prior ignorance. v = 0 and beta = 0
beta_update = s
mi_update = x_mean
v_update = M
alpha_update = M-1

In [9]:
# number of random variables
n = 18 

# Calculate Phi_Update
phi_update = (v_update+1)/(v_update*(alpha_update-n+1))
phi_update *= beta_update

In [10]:
# Calculate Precision Update
precision_update = np.linalg.inv(phi_update)

### Calculate Symbolic Precision Matrix

In [11]:
import sympy as sp

In [12]:
# Startup and configuration
n = 18         # 18 random variables
b = [None]*n   # list to initialize bij symbols
v = [None]*n   # list to initialize vi  symbols (variance)

In [13]:
# Create all bij symbols (from b11 to b1818)
for i in range(n):
    symbol_string = ''
    for j in range(i+1):
        symbol_string += 'b' + str(i+1) + str(j+1) + ' '
    b[i] = sp.symbols(symbol_string)

In [14]:
# Start B matrix using bij symbols
B = sp.zeros(n,n)
B[0, 0] = b[0]       # first symbols is exception not being a list of list
for i in range(1,n):
    for j in range(i+1):
        B[i,j] = b[i][j]

In [15]:
# Create all v, variance symbols (from v1 to v18)
for i in range(n):
    v[i] = sp.symbols(r'\sigma' + str(i+1))

In [16]:
T = sp.zeros(1,1)
T[0,0] = (1/v[0])**2
for i in range (1, 18):
    # Initialize ti
#    t = (1/v[i])**2
    t = (1/v[i]) # For simplicity variance will be represented without the square
    tt = sp.zeros(1,1)
    tt[0,0] = t
    
    # Initialize  
    TMP = sp.zeros(i+1,i+1)
    
    ## Populate                                                ## row, col
    TMP[0:i,   0:i]   = T + t*sp.Transpose(B[i,0:i])*B[i,0:i]  ## [0 , 0]
    TMP[0:i,   i:i+1] = -sp.Transpose(t*B[i,0:i])              ## [0 , 1]
    TMP[i:i+1, 0:i]   = -t*B[i,0:i]                            ## [1 , 0]
    TMP[i:i+1, i:i+1] = tt                                     ## [1 , 1]
    
    
    T = TMP
# T

### Solve Precision Matrix (get parameters $\sigma_{ij}^*$ and $b_{ij}$)

In [37]:
# Start Equation list 
EQ = sp.zeros(n*n)
for i in range(0,n):
    for j in range(0,n):
        # EQ[(i-1)*n + j] = T[i][j]-precision_update[i][j]        
        EQ[i*n + j] = T[i, j]-precision_update[i, j]

In [None]:
sp.solve(EQ, dict=True)

-0.0024307339661103276

-2 + 1/\sigma18

b93**2/\sigma9 + b83**2/\sigma8 + b73**2/\sigma7 + b63**2/\sigma6 + b53**2/\sigma5 + b43**2/\sigma4 + 1/\sigma3 + b183**2/\sigma18 + b173**2/\sigma17 + b163**2/\sigma16 + b153**2/\sigma15 + b143**2/\sigma14 + b133**2/\sigma13 + b123**2/\sigma12 + b113**2/\sigma11 + b103**2/\sigma10