Skip to content

Commit

Permalink
ENH documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
jmschrei committed Mar 24, 2016
1 parent 5f1d3e1 commit 925aa98
Show file tree
Hide file tree
Showing 9 changed files with 607 additions and 171 deletions.
68 changes: 50 additions & 18 deletions pomegranate/BayesianNetwork.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,19 @@ cdef class BayesianNetwork( Model ):
represent conditional dependencies of the children on their parents, and the
lack of an edge represents a conditional independence.
Parameters
----------
name : str, optional
The name of the model. Default is None
Attributes
----------
states : list, shape (n_states,)
A list of all the state objects in the model
graph : networkx.DiGraph
The underlying graph object.
Example
-------
>>> from pomegranate import *
Expand Down Expand Up @@ -61,11 +74,22 @@ cdef class BayesianNetwork( Model ):
[['B', 'A']]
"""

def bake( self, verbose=False ):
"""
The Bayesian Network is going to be mostly a wrapper for the Factor
Graph, as probabilities, inference, and training can be done more
efficiently on them.
def bake( self ):
"""Finalize the topology of the model.
Assign a numerical index to every state and create the underlying arrays
corresponding to the states and edges between the states. This method
must be called before any of the probability-calculating methods. This
includes converting conditional probability tables into joint probability
tables and creating a list of both marginal and table nodes.
Parameters
----------
None
Returns
-------
None
"""

# Initialize the factor graph
Expand Down Expand Up @@ -205,17 +229,19 @@ cdef class BayesianNetwork( Model ):
array with the values being ordered according to the nodes incorporation
in the graph (the order fed into .add_states/add_nodes) and None for
variables which are unknown. If nothing is fed in then calculate the
marginal of the graph.
marginal of the graph. Default is {}.
max_iterations : int, optional
The number of iterations with which to do loopy belief propogation.
Usually requires only 1.
Usually requires only 1. Default is 100.
check_input : bool, optional
Check to make sure that the observed symbol is a valid symbol for that
distribution to produce.
distribution to produce. Default is True.
Returns
-------
probabilitie : array-like, shape (n_nodes)
probabilities : array-like, shape (n_nodes)
An array of univariate distribution objects showing the probabilities
of each variable.
"""
Expand All @@ -239,17 +265,19 @@ cdef class BayesianNetwork( Model ):
array with the values being ordered according to the nodes incorporation
in the graph (the order fed into .add_states/add_nodes) and None for
variables which are unknown. If nothing is fed in then calculate the
marginal of the graph.
marginal of the graph. Default is {}.
max_iterations : int, optional
The number of iterations with which to do loopy belief propogation.
Usually requires only 1.
Usually requires only 1. Default is 100.
check_input : bool, optional
Check to make sure that the observed symbol is a valid symbol for that
distribution to produce.
distribution to produce. Default is True.
Returns
-------
probabilitie : array-like, shape (n_nodes)
probabilities : array-like, shape (n_nodes)
An array of univariate distribution objects showing the probabilities
of each variable.
"""
Expand Down Expand Up @@ -278,11 +306,13 @@ cdef class BayesianNetwork( Model ):
items : array-like, shape (n_samples, n_nodes)
The data to train on, where each row is a sample and each column
corresponds to the associated variable.
weights : array-like, shape (n_nodes), optional
The weight of each sample as a positive double
The weight of each sample as a positive double. Default is None.
inertia : double, optional
The inertia for updating the distributions, passed along to the
distribution method.
distribution method. Default is 0.0.
Returns
-------
Expand All @@ -298,9 +328,9 @@ cdef class BayesianNetwork( Model ):
if isinstance( state.distribution, ConditionalProbabilityTable ):
idx = [ indices[ dist ] for dist in state.distribution.parameters[1] ] + [i]
data = [ [ item[i] for i in idx ] for item in items ]
state.distribution.from_sample( data, weights, inertia )
state.distribution.fit( data, weights, inertia )
else:
state.distribution.from_sample( [ item[i] for item in items ], weights, inertia )
state.distribution.fit( [ item[i] for item in items ], weights, inertia )

self.bake()
return self
Expand All @@ -319,8 +349,10 @@ cdef class BayesianNetwork( Model ):
Data matrix to impute. Missing values must be either None (if lists)
or np.nan (if numpy.ndarray). Will fill in these values with the
maximally likely ones.
max_iterations : int, optional
Number of iterations to run loopy belief propogation for.
Number of iterations to run loopy belief propogation for. Default
is 100.
Returns
-------
Expand Down
106 changes: 84 additions & 22 deletions pomegranate/FactorGraph.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,16 @@ if sys.version_info[0] > 2:
xrange = range

cdef class FactorGraph( Model ):
"""
A biparte graph between factors and conditional probability
distributions.
"""A Factor Graph model.
A biparte graph where conditional probability tables are on one side,
and marginals for each of the variables involved are on the other
side.
Parameters
----------
name : str, optional
The name of the model. Default is None.
"""

cdef numpy.ndarray transitions, edge_count, marginals
Expand All @@ -27,27 +34,44 @@ cdef class FactorGraph( Model ):
the model when output. Name may not contain spaces or newlines.
"""

# Save the name or make up a name.
self.name = name or str( id(self) )
self.states = []
self.edges = []

def add_node( self, n ):
"""
Add a node to the graph.
"""Add a node to the given model.
The node must not already be in the model, nor may it be part of any
other model that will eventually be combined with this one.
Parameters
----------
state : Node
A node object to be added to the model.
Returns
-------
None
"""

self.states.append( n )

def bake( self, verbose=False ):
"""
Finalize the topology of the model, and assign a numerical index to
every node. This method must be called before any of the probability-
calculating or sampling methods.
This fills in self.states (a list of all states in order), the sparse
matrices of transitions and their weights, and also will merge silent
states.
def bake( self ):
"""Finalize the topology of the model.
Assign a numerical index to every state and create the underlying arrays
corresponding to the states and edges between the states. This method
must be called before any of the probability-calculating methods. This
is the same as the HMM bake, except that at the end it sets current
state information.
Parameters
----------
None
Returns
-------
None
"""

n, m = len(self.states), len(self.edges)
Expand Down Expand Up @@ -127,17 +151,55 @@ cdef class FactorGraph( Model ):
self.edges = []

def marginal( self ):
"""
Return the marginal of the graph.
"""Return the marginal probabilities of each variable in the graph.
This is equivalent to a pass of belief propogation on a graph where
no data has been given. This will calculate the probability of each
variable being in each possible emission when nothing is known.
Parameters
----------
None
Returns
-------
marginals : array-like, shape (n_nodes)
An array of univariate distribution objects showing the marginal
probabilities of that variable.
"""

return self.forward_backward( {} )

def forward_backward( self, data, max_iterations=10, verbose=False ):
"""
Perform the sum-product algorithm. The term 'marginal node' and 'variable node'
are used interchangably as I wrote this method while very excited over the course
of several days.
"""Returns the probabilities of each variable in the graph given evidence.
This calculates the marginal probability distributions for each state given
the evidence provided through loopy belief propogation. Loopy belief
propogation is an approximate algorithm which is exact for certain graph
structures.
Parameters
----------
data : dict or array-like, shape <= n_nodes, optional
The evidence supplied to the graph. This can either be a dictionary
with keys being state names and values being the observed values
(either the emissions or a distribution over the emissions) or an
array with the values being ordered according to the nodes incorporation
in the graph (the order fed into .add_states/add_nodes) and None for
variables which are unknown. If nothing is fed in then calculate the
marginal of the graph.
max_iterations : int, optional
The number of iterations with which to do loopy belief propogation.
Usually requires only 1.
check_input : bool, optional
Check to make sure that the observed symbol is a valid symbol for that
distribution to produce.
Returns
-------
probabilities : array-like, shape (n_nodes)
An array of univariate distribution objects showing the probabilities
of each variable.
"""

n, m = len( self.states ), len( self.transitions )
Expand Down Expand Up @@ -288,4 +350,4 @@ cdef class FactorGraph( Model ):

# We've already computed the current belief about the marginals, so
# we can just return that.
return current_distributions[ numpy.where( self.marginals == 1 ) ]
return current_distributions[ numpy.where( self.marginals == 1 ) ]
57 changes: 53 additions & 4 deletions pomegranate/MarkovChain.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
# Contact: Jacob Schreiber <jmschreiber91@gmail.com>

import numpy
import json

from .distributions import Distribution

cdef class MarkovChain(object):
"""A Markov Chain.
Expand Down Expand Up @@ -89,13 +92,13 @@ cdef class MarkovChain(object):
weights : array-like, shape (n_samples,), optional
The initial weights of each sample. If nothing is passed in then
each sample is assumed to be the same weight.
each sample is assumed to be the same weight. Default is None.
inertia : double, optional
The weight of the previous parameters of the model. The new
parameters will roughly be old_param*inertia + new_param*(1-inertia),
so an inertia of 0 means ignore the old parameters, whereas an
inertia of 1 means ignore the new parameters.
inertia of 1 means ignore the new parameters. Default is 0.0.
Returns
-------
Expand All @@ -119,7 +122,7 @@ cdef class MarkovChain(object):
weights : array-like, shape (n_samples,), optional
The initial weights of each sample. If nothing is passed in then
each sample is assumed to be the same weight.
each sample is assumed to be the same weight. Default is None.
Returns
-------
Expand Down Expand Up @@ -159,7 +162,7 @@ cdef class MarkovChain(object):
The weight of the previous parameters of the model. The new
parameters will roughly be old_param*inertia + new_param*(1-inertia),
so an inertia of 0 means ignore the old parameters, whereas an
inertia of 1 means ignore the new parameters.
inertia of 1 means ignore the new parameters. Default is 0.0.
Returns
-------
Expand All @@ -168,3 +171,49 @@ cdef class MarkovChain(object):

for i in range(self.k+1):
self.distributions[i].from_summaries( inertia=inertia )

def to_json( self, separators=(',', ' : '), indent=4 ):
"""Serialize the model to a JSON.
Parameters
----------
separators : tuple, optional
The two separaters to pass to the json.dumps function for formatting.
Default is (',', ' : ').
indent : int, optional
The indentation to use at each level. Passed to json.dumps for
formatting. Default is 4.
Returns
-------
json : str
A properly formatted JSON object.
"""

model = {
'class' : 'MarkovChain',
'distributions' : [ json.loads( d.to_json() ) for d in self.distributions ]
}

return json.dumps( model, separators=separators, indent=indent )

@classmethod
def from_json( cls, s ):
"""Read in a serialized model and return the appropriate classifier.
Parameters
----------
s : str
A JSON formatted string containing the file.
Returns
-------
model : object
A properly initialized and baked model.
"""

d = json.loads( s )
distributions = [ Distribution.from_json( json.dumps(j) ) for j in d['distributions'] ]
model = MarkovChain( distributions )
return model

0 comments on commit 925aa98

Please sign in to comment.