# Dirichlet prior as database

BNLearner gives access of many priors for the parameters and structural learning. One of them is the Dirichlet prior which needs a a prior for every possible parameter in a BN. aGrUM/pyAgrum allows to use a database as a source of Dirichlet prior.

In [1]:
%matplotlib inline
from pylab import *
import matplotlib.pyplot as plt

import os

import pyAgrum as gum
import pyAgrum.lib.notebook as gnb

sizePrior=10000
sizeData=15000

## generating databases for Dirichlet prior and for the learning 

In [2]:
bnPrior = gum.fastBN("A;B;C")
gum.generateCSV(bnPrior, "dirichlet.csv", sizePrior, with_labels=True)

bnData = gum.fastBN("A->B->C")
gum.generateCSV(bnData, "database.csv", sizeData, with_labels=True)

gnb.sideBySide(bnData,bnPrior,
               captions=[f"Database ({sizeData} cases)",f"Prior ({sizePrior} cases)"])

0,1
G A A B B A->B C C B->C,G A A B B C C
Database (15000 cases),Prior (10000 cases)


## Learning databases

In [3]:
# bnPrior is used to give the variables and their domains
learnerData = gum.BNLearner("database.csv", bnPrior) 
learnerPrior = gum.BNLearner("dirichlet.csv", bnPrior) 
learnerData.useScoreBIC()
learnerPrior.useScoreBIC()
gnb.sideBySide(learnerData.learnBN(),learnerPrior.learnBN(),
              captions=["Learning from Data","Learning from Prior"])

0,1
G A A B B A->B C C B->C,G A A B B C C
Learning from Data,Learning from Prior


## Learning with Dirichlet prior

In [8]:
def learnWithRatio(ratio):
    # bnPrior is used to give the variables and their domains
    learner = gum.BNLearner("database.csv", bnPrior) 
    learner.useAprioriDirichlet("dirichlet.csv")
    learner.setAprioriWeight(ratio*sizePrior)
    learner.setDatabaseWeight((1-ratio)) #*sizeData)
    learner.useScoreBIC() # or another score with no included prior
    return learner.learnBN()

ratios=[0.0,0.2,0.5,0.8,0.9,0.95,1.0]
gnb.sideBySide(*[learnWithRatio(r) for r in ratios],
              captions=[*[f"with ratio {r}<br/> [datasize : {r*sizePrior+(1-r)*sizeData}]" for r in ratios]])

0,1,2,3,4,5,6
G A A B B A->B C C B->C,G A A C C A->C B B B->A B->C,G A A C C A->C B B B->A B->C,G A A B B B->A C C B->C C->A,G A A B B B->A C C C->A C->B,G A A B B B->A C C C->A,G A A B B C C
with ratio 0.0  [datasize : 15000.0],with ratio 0.2  [datasize : 14000.0],with ratio 0.5  [datasize : 12500.0],with ratio 0.8  [datasize : 11000.0],with ratio 0.9  [datasize : 10500.0],with ratio 0.95  [datasize : 10250.0],with ratio 1.0  [datasize : 10000.0]
