In [1]:
import json
import pandas as pd
import numpy as np
from fractions import Fraction


In [2]:
# Note, all index starts from 1
# fixed parameters
balls = 10
name_cols = ['B','W','P','G']
name_urns = ['A','B','C','D']

# Experimental Design: Ball compositions and define the maximum number of urns = 4 and maximum number of colours = 4
ball_comp = {(2,2):{'p_col1_u1':0.7, 'p_col2_u1':0.3, 'p_col1_u2':0.3, 'p_col2_u2':0.7}, 
             (2,3):{'p_col1_u1':0.7, 'p_col2_u1':0.2, 'p_col3_u1':0.1, 'p_col1_u2':0.2, 'p_col2_u2':0.1, 'p_col3_u2':0.7},
             (2,4):{'p_col1_u1':0.4, 'p_col2_u1':0.3, 'p_col3_u1':0.2, 'p_col4_u1':0.1, 'p_col1_u2':0.1, 'p_col2_u2':0.2, 'p_col3_u2':0.3, 'p_col4_u2':0.4},
             (3,2):{'p_col1_u1':0.7, 'p_col2_u1':0.3, 'p_col1_u2':0.5, 'p_col2_u2':0.5, 'p_col1_u3':0.3, 'p_col2_u3':0.7},
             (3,3):{'p_col1_u1':0.7, 'p_col2_u1':0.2, 'p_col3_u1':0.1, 'p_col1_u2':0.1, 'p_col2_u2':0.7, 'p_col3_u2':0.2, 'p_col1_u3':0.2, 'p_col2_u3':0.1, 'p_col3_u3':0.7},
             (3,4):{'p_col1_u1':0.4, 'p_col2_u1':0.3, 'p_col3_u1':0.2, 'p_col4_u1':0.1, 'p_col1_u2':0.3, 'p_col2_u2':0.2, 'p_col3_u2':0.2, 'p_col4_u2':0.3, 'p_col1_u3':0.1, 'p_col2_u3':0.2, 'p_col3_u3':0.3, 'p_col4_u3':0.4},
             (4,2):{'p_col1_u1':0.7, 'p_col2_u1':0.3, 'p_col1_u2':0.6, 'p_col2_u2':0.4, 'p_col1_u3':0.4, 'p_col2_u3':0.6, 'p_col1_u4':0.3, 'p_col2_u4':0.7},
             (4,3):{'p_col1_u1':0.7, 'p_col2_u1':0.2, 'p_col3_u1':0.1, 'p_col1_u2':0.1, 'p_col2_u2':0.7, 'p_col3_u2':0.2, 'p_col1_u3':0.2, 'p_col2_u3':0.1, 'p_col3_u3':0.7, 'p_col1_u4':0.3, 'p_col2_u4':0.4, 'p_col3_u4':0.3},
             (4,4):{'p_col1_u1':0.4, 'p_col2_u1':0.3, 'p_col3_u1':0.2, 'p_col4_u1':0.1, 'p_col1_u2':0.3, 'p_col2_u2':0.4, 'p_col3_u2':0.1, 'p_col4_u2':0.2, 'p_col1_u3':0.2, 'p_col2_u3':0.1, 'p_col3_u3':0.4, 'p_col4_u3':0.3, 'p_col1_u4':0.1, 'p_col2_u4':0.2, 'p_col3_u4':0.3, 'p_col4_u4':0.4}
}

# Floating parameters
max_urns = 4
max_colours = 4
max_draws = 2
num_trials_per_comp = 2


In [3]:
def simulate_ball_draws( k, j, n, ball_comp, name_cols, name_urns):
    """
    Simulate a sequence of ball draws.

    Parameters:
        n (int): Length of the sequence.
        ball composition: a dict of dict. keys of first dict is number of urns and number of colours, the second key is p_colj_uk
        total number of urns k, 
        total number of colours j,

    Returns:
        list: Simulated sequence of ball draws.
    """

    # generate urn number list
    urns = name_urns[:k]
    # generate colour list
    colors = name_cols[:j]

    # randomly choose a urn to draw n balls with replacement
    chosen_urn = np.random.choice(urns)
    chosen_urn_index = urns.index(chosen_urn)

    # grab relevant experimental design base on k and j
    probabilities_dict = ball_comp[(k, j)]
    # get probability list for np.random.choice from experiment design
    # first element is p_col1_chosenUrn, p_col2_chosenUrn...
    probabilities = []
    for col in range(1,j+1):
        probabilities.append(probabilities_dict[f"p_col{col}_u{chosen_urn_index+1}"])
    return (chosen_urn, np.random.choice(colors, size=n, p=probabilities))

In [4]:
chosen_urn_lis = []
ball_draw_lis = []

# loop through all possible combinations of k and j
for urn in range(2,max_urns+1):
    for colour in range(2,max_colours+1):
        for _ in range(num_trials_per_comp):
            chosen_urn, sequence = simulate_ball_draws(k=urn,j=colour,n=max_draws,ball_comp=ball_comp,name_cols=name_cols,name_urns=name_urns)
            chosen_urn_lis.append(chosen_urn)
            ball_draw_lis.append(sequence)
# display(chosen_urn_lis)
# display(ball_draw_lis)

In [5]:
def create_urnTable_json(trial, urns, colours, balls, ball_comp, name_cols, name_urns):
    """
    Create a JSON file with the urn composition for a given number of urns and colours
    Uninformative prior is used

    trial: trial number
    urns: number of urns
    colours: number of colours
    balls: number of balls
    ball_comp: dictionary with the ball composition
    name_cols: list with the names of the colours
    name_urns: list with the names of the urns
    """

    # covert decimal prior to fraction
    prior = str(Fraction(1, urns))

    # Create a dataframe from the ball composition dictionary
    ball_comp_df = pd.DataFrame(ball_comp[(urns,colours)], index=[0])
    ball_comp_freq = ball_comp_df * balls

    # urn_lis is a list of urn compositions
    urn_lis = []
    for i in range(urns):
        lis =[]
        for j in range(colours):
            lis.append(str(int(ball_comp_freq['p_col'+str(j+1)+'_u'+str(i+1)][0])) + name_cols[j])
        urn_lis.append(lis)

    # Create a list of dictionaries, each dictionary represents an urn
    urn_entries = []
    for i in range(urns):
        urn_dict = {}
        urn_dict['urnName'] = name_urns[i]
        urn_dict['prior'] = prior
        urn_dict['composition'] = urn_lis[i]
        urn_dict['balls'] = balls
        urn_entries.append(urn_dict)

    # Create a dictionary for the total urn
    total_dict = {}
    total_dict['urnName'] = 'Total'
    total_dict['prior'] = '1'
    # calculate the total composition
    total_comp_lis = []
    for j in range(colours):
        total_balls_per_colour = 0
        name = name_cols[j]
        for i in range(urns):
            total_balls_per_colour += int(ball_comp_freq['p_col'+str(j+1)+'_u'+str(i+1)][0])
        total_comp_lis.append(str(total_balls_per_colour) + name)
    total_dict['composition'] = total_comp_lis
    total_dict['balls'] = balls * urns
    urn_entries.append(total_dict)

    # wrap the urn_entries in an object
    urn_entries_object = {
        f"urnInfo": urn_entries
    }

    # create a dictionary for chosen urn and ball draws
    # trial_dict = {}
    # trial_dict['chosenUrn'] = chosen_urn_lis[trial]
    # trial_dict['ballDraws'] = list(ball_draw_lis[trial])
    urn_entries_object['chosenUrn'] = chosen_urn_lis[trial]
    urn_entries_object['ballDraws'] = list(ball_draw_lis[trial])

    # Wrap the list in an object
    urn_entries_object = {
        f"BU{trial+1}": urn_entries_object
    }
    return urn_entries_object



In [6]:
create_urnTable_json(trial=1,urns=2, colours=2, balls=10, ball_comp=ball_comp, name_cols=name_cols, name_urns=name_urns) 

{'BU2': {'urnInfo': [{'urnName': 'A',
    'prior': '1/2',
    'composition': ['7B', '3W'],
    'balls': 10},
   {'urnName': 'B', 'prior': '1/2', 'composition': ['3B', '7W'], 'balls': 10},
   {'urnName': 'Total',
    'prior': '1',
    'composition': ['10B', '10W'],
    'balls': 20}],
  'chosenUrn': 'B',
  'ballDraws': ['B', 'W']}}

In [7]:
trials_lis = []
trial_num = 0
for urn in range(2,max_urns+1):
    for colour in range(2,max_colours+1):
        for _ in range(num_trials_per_comp):
            urnTable = create_urnTable_json(trial=trial_num,urns=urn, colours=colour, balls=balls, ball_comp=ball_comp, name_cols=name_cols, name_urns=name_urns)
            trials_lis.append(urnTable)
            trial_num += 1

# # Wrap the list in an object
# urn_entries_object = {
#     "urnEntries": trials_lis
# }

# Convert the dictionary to a JSON string
urn_entries_json = json.dumps(trials_lis, indent=4)

# Write the JSON string to a file
filename = f'input.json'
with open(filename, 'w') as json_file:
    json_file.write(urn_entries_json)