In [36]:
import numpy as np
in_files = ("alarm.bif", "records.dat")

In [37]:
def read_records(filename = "records.dat"):
    # read records from file
    with open(filename, 'r') as f:
        records = f.readlines()

    # convert records to numpy array of columns
    records = np.array([record.split() for record in records]).T

    return records

records = read_records(in_files[1])
records

array([['"False"', '"False"', '"False"', ..., '"False"', '"False"',
        '"False"'],
       ['"Normal"', '"Normal"', '"Normal"', ..., '"High"', '"Normal"',
        '"Normal"'],
       ['"False"', '"?"', '"False"', ..., '"False"', '"False"',
        '"False"'],
       ...,
       ['"Normal"', '"Normal"', '"Normal"', ..., '"Low"', '"Normal"',
        '"Normal"'],
       ['"Normal"', '"Normal"', '"Normal"', ..., '"Normal"', '"Normal"',
        '"Normal"'],
       ['"Normal"', '"Normal"', '"Normal"', ..., '"OneSided"',
        '"Normal"', '"Normal"']], dtype='<U12')

In [55]:
variable_mapping = {}
variable_mapping_inv = {}
def parse_bif_variables(file_path):
    """
    Parses the .bif file to extract variable names and their possible values.
    
    Args:
    - file_path (str): Path to the .bif file.
    
    Returns:
    - dict: A dictionary where keys are variable names and values are lists of possible values.
    """
    global variable_mapping, variable_mapping_inv
    variables = {}
    with open(file_path, "r") as file:
        lines = file.readlines()
        
    # Flags to determine if we are within a variable block
    inside_variable_block = False
    current_variable = None
    
    counter = 0
    for line in lines:
        line = line.strip()  # Remove leading and trailing whitespace
        if line.startswith("variable"):
            # Extract variable name
            current_variable = line.split("\"")[1]
            
            variable_mapping[current_variable] = counter
            variable_mapping_inv[counter] = current_variable
            counter += 1
            
            inside_variable_block = True
        elif inside_variable_block and line.startswith("type discrete"):
            # Extract possible values for the variable
            values = line.split("{")[1].split("}")[0].split()
            values = [v.strip() for v in values]
            variables[current_variable] = values
        elif inside_variable_block and line == "}":
            # End of the variable block
            inside_variable_block = False
            current_variable = None
            
    return variables

variables = parse_bif_variables(in_files[0])
variables, variable_mapping

({'Hypovolemia': ['"True"', '"False"'],
  'StrokeVolume': ['"Low"', '"Normal"', '"High"'],
  'LVFailure': ['"True"', '"False"'],
  'LVEDVolume': ['"Low"', '"Normal"', '"High"'],
  'PCWP': ['"Low"', '"Normal"', '"High"'],
  'CVP': ['"Low"', '"Normal"', '"High"'],
  'History': ['"True"', '"False"'],
  'MinVolSet': ['"Low"', '"Normal"', '"High"'],
  'VentMach': ['"Zero"', '"Low"', '"Normal"', '"High"'],
  'Disconnect': ['"True"', '"False"'],
  'VentTube': ['"Zero"', '"Low"', '"Normal"', '"High"'],
  'KinkedTube': ['"True"', '"False"'],
  'Press': ['"Zero"', '"Low"', '"Normal"', '"High"'],
  'ErrLowOutput': ['"True"', '"False"'],
  'HRBP': ['"Low"', '"Normal"', '"High"'],
  'ErrCauter': ['"True"', '"False"'],
  'HREKG': ['"Low"', '"Normal"', '"High"'],
  'HRSat': ['"Low"', '"Normal"', '"High"'],
  'BP': ['"Low"', '"Normal"', '"High"'],
  'CO': ['"Low"', '"Normal"', '"High"'],
  'HR': ['"Low"', '"Normal"', '"High"'],
  'TPR': ['"Low"', '"Normal"', '"High"'],
  'Anaphylaxis': ['"True"', '"Fa

In [39]:
records_weight_column = np.array([1.0]*len(records[0]))
probs = {}
L = 1
for key in variables:
    L *= len(variables[key])
def get_prob(var_names, permutation):
    if len(var_names) == 0:
        return 1.0
    
    if (str(var_names) + str(permutation)) in probs:
        return probs[str(var_names) + str(permutation)]
    
    count = np.array([True]*len(records[0]))
    
    for i in range(len(var_names)):
        count = count & (records[variable_mapping[var_names[i]]] == permutation[i])
    prob = (np.sum(records_weight_column[count])+1)/(np.sum(records_weight_column))
    
    prob = prob / get_prob(var_names[1:], permutation[1:])
    
    probs[str(var_names) + str(permutation)] = prob
    
    return round(prob, 4)

In [40]:
import itertools

def process_probablity_line(line):
    # print(line)
    extract = line.split('-1')
    var_names = [i for i in extract[0].replace(" ", '')
                  .split('(')[1].split(')')[0].split('"') if len(i)!=0]
    permutations = list(itertools.product(*[variables[var_name] for var_name in var_names]))
    # print(var_names, permutations)
    prob = [get_prob(var_names, permutation) for permutation in permutations]
    # print(prob)
    
    return extract[0] + " ".join([str(i) for i in prob]) + extract[-1]

In [41]:
def update_bif(in_filename=in_files[0], out_filename=f"solved_{in_files[0]}"):
    with open(in_filename, 'r') as f_in:
        with open(out_filename, 'w') as f_out:
            while True:
                line = f_in.readline()
                if not line:
                    break
                if "probability" in line:
                    while '}' not in line:
                        line += f_in.readline()
                    line = process_probablity_line(line)
                f_out.write(line)

In [42]:
update_bif()

In [52]:
def e_step():
    predicted_records = []
    predicted_records_weights = []
    
    missing_indexes = np.where(records.T == '"?"')
    
    
    predicted_records = np.array(predicted_records)
    predicted_records_weights = np.array(predicted_records_weights)
    
    return predicted_records, predicted_records_weights
e_step()

37 37


In [56]:
predicted_records = []
predicted_records_weights = []
for row in records.T:
    missing_index = np.where(row == '"?"')
    if len(missing_index) > 0:
        print("found missing value")
        idx = missing_index[0][0]
        for value in variables[variable_mapping_inv[
            idx
        ]]:
            temp_row = row.copy()
            temp_row[idx] = value
            predicted_records.append(temp_row)
            
            
            predicted_records_weights.append(
               get_prob([variable_mapping_inv[i] for i in range(len(row))], temp_row)/get_prob([variable_mapping_inv[i] for i in range(len(row)) if i!=idx], [temp_row[i] for i in range(len(row))if i!=idx]) 
            )
    else:
        predicted_records.append(row)
        predicted_records_weights.append(1.0)
predicted_records, predicted_records_weights

found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value
found missing value


KeyboardInterrupt: 

In [58]:
product = 1
for key in variables:
    print(len(variables[key]))
    product *= len(variables[key])
product

2
3
2
3
3
3
2
3
4
2
4
2
4
2
3
2
3
3
3
3
3
3
2
2
3
2
2
2
3
2
3
4
4
3
4
4
3


17332899271409664