In [1]:
from collections import defaultdict, Counter
from copy import deepcopy
import logging as log

In [2]:
log.basicConfig(
            format="%(levelname)s: %(message)s",
            level=log.DEBUG
                        )

In [3]:
def read_input(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    return lines

In [12]:
class Node:
    """Class Node represents a node in the graph input"""

    def __init__(self, name):
        self.name = name
        self.node_type = None
        self.reward = None
        self.value = 0
        self.edges = []
        self.probs = []
        self.current_policy = name
        
    def __repr__(self,):
        stats = {
            "name": self.name,
            "node_type": self.node_type,
            "edges": self.edges,
            "probs": self.probs,
            "value": self.value,
            "reward": self.reward,
            "current_policy": self.current_policy
        }
        repr_ = str(stats)
        return repr_
    
    def __str__(self,):
        return f"({self.name}, {self.node_type})"
    
    def add_to_edges(self, edge):
        self.edges.append(edge)
        
    def add_to_probs(self, prob):
        self.probs.append(prob)
        
    def sum_probs(self,):
        return sum(self.probs)
    
    def print_policy(self,):
        print(f"{self.name} -> {self.current_policy}")

In [13]:
def tokenize_line(line):
    if "[" in line:
        line = line.replace("[", " [ ")
    if "]" in line:
        line = line.replace("]", " ] ")
    if ":" in line:
        line = line.replace(":", " : ")
    if "%" in line:
        line = line.replace("%", " % ")
    if "=" in line:
        line = line.replace("=", " = ")
    if "," in line:
        line = line.replace(",", " ")
    return line

In [14]:
def parse_input(filename):
    '''
    Tokenize line, convert tokens to numbers
    '''
    # Read and clean input lines
    lines = read_input(filename=filename)
    lines = [l.strip('\n') for l in lines if l != '\n']  #remove extra spaces from input
    lines = [l for l in lines if not "#" in l]    
    # tokenize line
    lines = [tokenize_line(l) for l in lines]
    lines = [l.strip().split() for l in lines]
    # convert numeric tokens to numerics
    lines_ = []
    for line in lines:
        # for float values
        line = [float(item) if item.replace('.','',1).isdigit() == True else item for item in line]
        # for negative values
        line = [float(item) if (type(item)==str and item.replace('-','',1).isdigit() == True) else item for item in line]
        lines_.append(line)
    return lines_


In [15]:
def create_nodes_from_input(lines):
    name2nodes = {}
    for line in lines:
        name = line[0]
        if name not in name2nodes:
            name2nodes[name] = Node(name=name)
            
    return name2nodes

In [19]:
def assign_rewards_probs_edges(lines, name2nodes):
    for line in lines:
        # Reward/cost line
        if "=" in line:
            name, reward = line[0], line[-1]
            name2nodes[name].reward = reward
        # Probabilities line
        if "%" in line: 
            name, probs = line[0], line[line.index('%')+1:]
            for prob_val in probs:
                name2nodes[name].add_to_probs(prob_val)
        # Edges line
        if all(x in line for x in ['[', ']']): 
            name, edges = line[0], line[line.index('[')+1:line.index(']')]
            for edge in edges:
                name2nodes[name].add_to_edges(edge)
    return name2nodes
            

In [20]:
def assign_nodetype(name2nodes):
    for name in name2nodes:
        node = name2nodes[name]
        
        # If a node has edges but no probability entry, it is assumed to be a decision node with p=1
        if len(node.edges)!=0:
            if len(node.probs)==0:
                node.add_probs(1.0)
            if len(node.probs)==1:
                node.node_type = 'Decision'
        
        # If a node has edges but no reward entry, it is assumed to have a reward of 0
        if len(node.edges)!=0 and node.reward is None:
            node.reward = 0
        
        # If a node has no edges it is terminal. A probability entry for such a node is an error.
        if len(node.edges)==0:
            if len(node.probs)!=0:
                print("probability entry for Terminal node found, exiting... ")
                exit(0)
            else:
                node.node_type = 'Terminal'
        
        # A node with the same number of probabilities as edges is a chance node, with synchronized positions.        
        if len(node.edges)>0 and len(node.edges)==len(node.probs):
            node.node_type = 'Chance'
        
        # TODO
        # If a node has a single edge it always transitions there. (this is useful for capturing some reward on the way)
        # A node referenced as an edge must separately have one of the three entries to be valid
        # Therefore to create a 0 value terminal node you must do 'name = 0'
        
    
    for name in name2nodes:
        node = name2nodes[name]
        if node.node_type=='Chance':
            try:
                assert node.sum_probs()==1.0
            except:
                print("Chance node probabilities do not sum to 1.0, exiting...")
                exit(0)
    
    return name2nodes
        
            
            
        

In [33]:
def compute_new_value(node, arg_df):
    """
    Using Bellman update equation
    """
    new_value = 0.0
    if node.node_type=='Terminal':
        return node.reward
    else:
        new_value, edge_value_sum = 0.0, 0.0
        if node.node_type=='Chance':
            for idx, edge in enumerate(node.edges):
                edge_value_sum += node.probs[idx]*name2nodes[edge].value
        elif node.node_type=='Decision':
            policy, main_prob = node.current_policy, node.probs[0]
            rem_prob = (1-main_prob)/(len(node.edges) - 1) if len(node.edges)!=1 else 0.0
            for _, edge in enumerate(node.edges):
                if policy==edge:
                    edge_value_sum += main_prob*name2nodes[edge].value
                else:
                    edge_value_sum += rem_prob*name2nodes[edge].value
        new_value = node.reward + (arg_df * edge_value_sum)
        return new_value


def value_iteration_step(name2nodes, arg_df):
    max_delta = 0.0
    for name in name2nodes:
        node, value_old = name2nodes[name], name2nodes[name].value
        value_new = compute_new_value(node, arg_df=arg_df)
        delta = abs(value_new - value_old)
        if delta > max_delta:
            max_delta = delta
    return max_delta
        

def value_iteration(name2nodes, arg_df, arg_tol, arg_iter):
    for _ in range(arg_iter):
        delta = value_iteration_step(name2nodes, arg_df)
        if arg_tol >= delta:
            break
            

def compute_new_policy(node, name2nodes, arg_min):
    return node.current_policy
    
    
def greedy_policy_computation(name2nodes, arg_min):
    policy_change = False
    for name in name2nodes:
        node = name2nodes[name]
        if node.node_type=='Decision':
            old_policy = node.current_policy
            new_policy = compute_new_policy(node, name2nodes, arg_min)
            if old_policy != new_policy:
                policy_change = True
            node.current_policy = new_policy
        else:
            continue
    return policy_change
    
    
def markov_process_solver(name2nodes, arg_df, arg_min, arg_tol, arg_iter):
    # Define initial arbitrary policy
    for name in name2nodes:
        if name2nodes[name].node_type!='Terminal':
            name2nodes[name].current_policy = name2nodes[name].edges[0]
    while True:
        value_iteration(name2nodes=name2nodes, arg_df=arg_df, arg_tol=arg_tol, arg_iter=arg_iter)
        if not greedy_policy_computation(name2nodes=name2nodes, arg_min=arg_min):
            break

In [34]:
def main():
    
    arg_df, arg_min, arg_tol, arg_iter = 1.0, True, 0.001, 100
    
    EXAMPLES_DIR = "./"
    input_filename = "publish.txt"
    parsed_lines = parse_input(input_filename)
    name2nodes = create_nodes_from_input(parsed_lines)
    # print(parsed_lines, name2nodes)
    name2nodes = assign_rewards_probs_edges(lines=parsed_lines, name2nodes=name2nodes)
    name2nodes = assign_nodetype(name2nodes=name2nodes)
    #print(name2nodes)
    
    markov_process_solver(name2nodes=name2nodes, arg_df=arg_df, arg_min=arg_min, arg_tol=arg_tol, arg_iter=arg_iter)
    
    # print results
    for name in name2nodes:
        if name2nodes[name].node_type=='Decision' and len(name2nodes[name].edges) > 1:
            name2nodes[name].print_policy()
    print()
    for name in name2nodes:
        print(name,"=",name2nodes[name].value, end = " ")
    print()

In [35]:
main()

S -> Reject

S = 0 Reject = 0 Publish = 0 Success = 0 Failure = 0 Consult = 0 Against = 0 For = 0 
