Chapter 2 - Gaussian Bayesian Networks - probabilistic reasoning

The crop data <br>
G = genetic potential <br>
E = environmental potential <br>
V = vegatative mass <br>
N = number of seeds (@ flowering time) <br>
W = average weight (later in plants life) <br>
C = crop <br>

$G\sim N(50, 10^2)$ <br>
$E \sim N(50, 10^2)$ <br>
$V \mid G = g,E=e \sim N(-10.35 + 0.5g+0.70e, 5^2)$ <br>
$N \mid V = v \sim N(45 + 0.1v, 9.95^2)$<br>
$W \mid V = v \sim N(15 + 0.7v, 7.14^2)$<br>
$C \mid N = n, W = w \sim N(0.3n + 0.7w, 6.25^2)$<br>

In [None]:
install.packages(
  "bnlearn",
  repos = "https://cloud.r-project.org",   # CRAN mirror
  dependencies = TRUE                      # pulls in igraph, gRain, etc.
)

In [None]:
install.packages(
  "rbmn",
  repos = "https://cloud.r-project.org",   # CRAN mirror
  dependencies = TRUE                      # pulls in igraph, gRain, etc.
)

In [None]:
install.packages(
  "gRain",
  repos = "https://cloud.r-project.org",   # CRAN mirror
  dependencies = TRUE                      # pulls in igraph, gRain, etc.
)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘cowplot’, ‘Deriv’, ‘doBy’, ‘litedown’, ‘gRim’, ‘markdown’, ‘microbenchmark’


Loading required package: gRbase


Attaching package: ‘gRbase’


The following objects are masked from ‘package:bnlearn’:

    ancestors, children, nodes, parents




In [104]:
library(bnlearn)
library(gRain)
library(rbmn)

In [105]:
# From Lab 7 -- the paramaterization
# Specify the BN (from formula --> graph)
dag.bnlearn <- model2network("[G][E][V|G:E][N|V][W|V][C|N:W]")

# Specify the distributions
disE <- list(coef = c("(Intercept)" = 50), sd = 10)
disG <- list(coef = c("(Intercept)" = 50), sd = 10)
disV <- list(coef = c("(Intercept)" = -10.35, E = .70, G = 0.5), sd = 5)
disN <- list(coef = c("(Intercept)" = 45, V = .1), sd = 9.95)
disW <- list(coef = c("(Intercept)" = 15, V = 0.7), sd = 7.14)
disC <- list(coef = c("(Intercept)" = 0, N = 0.3, W = 0.7), sd = 6.25)

# put them together for bn parameterization
dis.list <- list(E = disE, G = disG, V = disV, N = disN, W = disW, C = disC)

gbn.bnlearn <- custom.fit(dag.bnlearn, dist = dis.list)

### Here, we'll only focus on Approximate Inference so we rely on Simulation

In [106]:
# Basic Simulation
set.seed(123)
sim <- rbn(gbn.bnlearn, n = 5) # as before, a basic simulation
head(sim)

Unnamed: 0_level_0,C,E,G,N,V,W
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,47.24133,67.15065,44.39524,69.27713,64.97349,52.85718
2,57.4656,54.60916,47.69823,55.30607,53.5246,50.91088
3,40.26808,37.34939,65.58708,30.49136,50.59197,43.08871
4,39.12615,43.13147,50.70508,56.55329,45.74799,41.81931
5,50.38787,45.54338,51.29288,44.73549,44.3976,41.61554


In [107]:
sim[ , c("V", "N", "C")]

V,N,C
<dbl>,<dbl>,<dbl>
64.97349,69.27713,47.24133
53.5246,55.30607,57.4656
50.59197,30.49136,40.26808
45.74799,56.55329,39.12615
44.3976,44.73549,50.38787


In [108]:
cropdata1 <- rbn(gbn.bnlearn, n = 200)
cropdata2 <- rbn(gbn.bnlearn, n = 2000) # nothing new, simulating from a BN

In [109]:
# create some simulations that mimic questions/queries of interest
# N, C | V = "small" --- > N, C | V = 35
#?cpdist
query1 <- cpdist(gbn.bnlearn, nodes = c("N", "C", "V"), evidence = (V < 35), n = 1000)
head(query1)
dim(query1)

Unnamed: 0_level_0,N,C,V
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
1,40.99868,32.75259,23.5742
2,28.87883,35.63964,34.73576
3,44.57809,43.18525,33.94617
4,54.32541,52.12159,32.9963
5,54.01725,51.72006,34.68447
6,56.31346,38.14457,30.29418


In [110]:
# Consider a different example, V|G=10, E=90. here, for exact events we have to use likelihood weighting
query2 <- cpdist(gbn.bnlearn, nodes = c("V"), evidence = list(G=10, E=90), method = "lw", n = 100)
head(query2)
dim(query2)

Unnamed: 0_level_0,V
Unnamed: 0_level_1,<dbl>
1,55.89894
2,59.27849
3,54.48378
4,51.84093
5,59.35543
6,65.11452


In [111]:
# Query the probability of an exact event
query3 <- cpquery(gbn.bnlearn, event = c(V > 70), evidence = list(G=10, E=90), method = "lw")
query3