In [1]:
"""
@Author: Joris van Vugt, Moira Berens

Representation of a Bayesian network for AISPAML assignment 3

"""

from itertools import islice
import numpy as np
import pandas as pd

class BayesNet():
    """
    This class represents a Bayesian network.
    It can read files in a .bif format (if the formatting is
    along the lines of http://www.bnlearn.com/bnrepository/)

    Uses pandas DataFrames for representing conditional probability tables
    """

    # Possible values per variable
    values = {}

    # Probability distributions per variable
    probabilities = {}

    # Parents per variable
    parents = {}

    def __init__(self, filename):
        """
        Construct a bayesian network from a .bif file

        """
        with open(filename, 'r') as file:
            line_number = 0
            for line in file:
                if line.startswith('network'):
                    self.name = ' '.join(line.split()[1:-1])
                elif line.startswith('variable'):
                    self.parse_variable(line_number, filename)
                elif line.startswith('probability'):
                    self.parse_probability(line_number, filename)
              	line_number = line_number + 1

    def parse_probability(self, line_number, filename):
        """
        Parse the probability distribution
        """

        # get line
        line = open(filename, 'r').readlines()[line_number]

        # Find out what variable(s) we are talking about
        variable, parents = self.parse_parents(line)
        next_line = open(filename, 'r').readlines()[line_number + 1].strip()

        # If a variable has no parents, its probabilities start with table
        if next_line.startswith('table'):
			comma_sep_probs = next_line.split('table')[1].split(';')[0].strip()
			probs = [float(p) for p in comma_sep_probs.split(',')]
			df = pd.DataFrame(columns=[variable, 'prob'])
			for value, p in zip(self.values[variable], probs):
				df.loc[len(df)] = [value, p]
			self.probabilities[variable] = df
        else:
       		#create dataFrame to store the variables
            df = pd.DataFrame(columns=[variable] + parents + ['prob'])

            #loop over the lines until a line is the same as "}" 
            with open(filename, 'r') as file:
            	for i in xrange(line_number + 1):
            		file.next()
            	for line in file:
                	if '}' in line:
                    	# Done reading this probability distribution
						break

                	# Get the values for the parents
                	comma_sep_values = line.split('(')[1].split(')')[0]
                	values = [v.strip() for v in comma_sep_values.split(',')]

                	# Get the probabilities for the variable
                	comma_sep_probs = line.split(')')[1].split(';')[0].strip()
                	probs = [float(p) for p in comma_sep_probs.split(',')]

                	# Create a row in the df for each value combination
                	for value, p in zip(self.values[variable], probs):
                		df.loc[len(df)] = [value] + values + [p]

            self.probabilities[variable] = df


    def parse_variable(self, line_number, filename):
        """
        Parse the name of a variable and its possible values
        """
        variable = open(filename, 'r').readlines()[line_number].split()[1]
        line = open(filename, 'r').readlines()[line_number+1]
       	start = line.find('{') + 1
        end = line.find('}')
        values = [value.strip() for value in line[start:end].split(',')]
        self.values[variable] = values

    def parse_parents(self, line):
        """
        Find out what variables are the parents
        Returns the variable and its parents
        """
        start = line.find('(') + 1
        end = line.find(')')
        variables = line[start:end].strip().split('|')
        variable = variables[0].strip()
        if len(variables) > 1:
           parents = variables[1]
           self.parents[variable] = [v.strip() for v in parents.split(',')]
        else:
           self.parents[variable] = []
        return variable, self.parents[variable]

    @property
    def nodes(self):
        """Returns the names of the variables in the network"""
        return list(self.values.keys())

In [2]:
"""
@Author: Joris van Vugt, Moira Berens

Implementation of the variable elimination algorithm for AISPAML assignment 3

"""

class VariableElimination():

    def __init__(self, network):
        self.network = network
        self.addition_steps =  0
        self.multiplication_steps = 0

    def run(self, query, observed, elim_order):
        """
        Use the variable elimination algorithm to find out the probability
        distribution of the query variable given the observed variables

        Input:
            query:      The query variable
            observed:   A dictionary of the observed variables {variable: value}
            elim_order: Either a list specifying the elimination ordering
                        or a function that will determine an elimination ordering
                        given the network during the run

        Output: A variable holding the probability distribution
                for the query variable

        """

In [7]:
"""
@Author: Joris van Vugt, Moira Berens

Entry point for testing the variable elimination algorithm

"""
#from read_bayesnet import BayesNet
#from variable_elim import *

if __name__ == '__main__':
    # the class BayesNet represents a Bayesian network from a .bif file
    # in several variables
    net = BayesNet('earthquake.bif') 
    
    # these are the variables that should be used for variable elimination
    print 'values', net.values 
    print 'probabilities', net.probabilities
    print 'parents', net.parents
    print 'nodes', net.nodes

    
    # Make your variable elimination code in a seperate file: 'variable_elim'. 
    # you can call this file as follows:
    #ve = VariableElimination(net)

    # If variables are known beforehand, you can represent them in the following way: 
    # evidence = {'Burglary': 'True'}

    # determine you heuristics before you call the run function. This can be done in this file or in a seperate file
    # The heuristics either specifying the elimination ordering (list) or it is a function that determines the elimination ordering
    # given the network. An simple example is: 
    # elim_order = net.nodes

	#call the elimination ordering function for example as follows:   
    #ve.run('Alarm', evidence, elim_order)



IOError: [Errno 2] No such file or directory: 'earthquake.bif'