In [1]:
import numpy as np
import pandas as pd
import datetime as dt
from matplotlib import pyplot as plt

from tqdm import tqdm

In [2]:
# import datetime as dt
# import pandas as pd

def save_as_csv(input_df, output_folder, output_filename_prefix = 'output'):
#     output_folder = r'C:\Users\priva\OneDrive - HKUST Connect\Stock\Data\temp outputs'
    now = dt.datetime.now()
    ret = [now.month, now.day, now.hour, now.minute]
    ret = ['0' + str(i) if len(str(i)) == 1 else i for i in ret]
    now_str = "{}{}{}-{}{}-{}".format(now.year, ret[0], ret[1], ret[2], ret[3], now.second)
    output_filename = f'{output_filename_prefix}_{now_str}.csv'
    
    output_link = f'{output_folder}\\{output_filename}'
    input_df.to_csv(output_link)
    print(f"Output file is saved to \n{output_folder}\nFilename: {output_filename}")
    return output_link

In [3]:
# # Version 2
# # @jit(target_backend='cuda') 
# def get_cell_surroundings(idx_i, idx_j, env_arr):
#     check_inp_type(inp = env_arr, expect_type = type(np.array([])))
    
#     # ................................................
#     # get range of idx_i and idx_j
#     # note: the max values are to be expected to be reachable. Contrast index properties in lists or tuples.
#     idx_i_range = [idx_i-1, idx_i+1]
#     idx_j_range = [idx_j-1, idx_j+1]
#     i_pos, j_pos = 0, 0
    
#     # ................................................
#     # check and modify the min values of idx_i, idx_j
#     # update i_pos and j_pos; 0 if in the middle, -1 if leftmost or topmost, 1 if rightmost or bottommost
#     if idx_i_range[0] < 0:
#         i_pos = -1    # leftmost
#         idx_i_range[0] = idx_i
        
#     if idx_j_range[0] < 0:
#         j_pos = -1    # topmost
#         idx_j_range[0] = idx_j
    
#     # ................................................
#     # check and modify the max values of idx_i, idx_j
    
#     if idx_i_range[1]+1 > env_arr.shape[0]:
#         i_pos = 1    # rightmost
#         idx_i_range[1] = idx_i
        
#     if idx_j_range[1]+1 > env_arr.shape[1]:
#         j_pos = 1    # bottommost
#         idx_j_range[1] = idx_j
        
#     # ................................................
#     # SUPERSEDED
# #     idx_i_range[0] = idx_i if idx_i_range[0] < 0 else idx_i_range[0]
# #     idx_j_range[0] = idx_j if idx_j_range[0] < 0 else idx_j_range[0]
# #     idx_i_range[1] = idx_i if idx_i_range[1]+1 > env_arr.shape[0] else idx_i_range[1]
# #     idx_j_range[1] = idx_j if idx_j_range[1]+1 > env_arr.shape[1] else idx_j_range[1]
        
#     # ................................................
#     # get cell surroundings
#     target_arr = env_arr[idx_i_range[0]:idx_i_range[1]+1, idx_j_range[0]:idx_j_range[1]+1]
#     surroundings_arr = np.delete(target_arr[idx_i], [idx_j])
    
#     return target_arr
# #     return surroundings_arr

In [4]:
# # Version 2
# # @jit(target_backend='cuda') 

# def cell_decide(cell_val, surroundings_arr):
#     # cell_val: int; 0 or 1 ONLY
#     # surroundings_arr: numpy.array; elements = 0 or 1 ONLY
#     # import numpy as np
#     check_inp_type(inp = surroundings_arr, expect_type = type(np.array([])))
    
#     one_counts = np.count_nonzero(surroundings_arr==1)
#     zero_counts = np.count_nonzero(surroundings_arr==0)
    
#     assert one_counts + zero_counts == len(surroundings_arr), "Elements in surroundings_arr should be 0 or 1 ONLY."
    
#     ret = 0 if zero_counts > one_counts else 1 if one_counts > zero_counts else cell_val
    
#     return ret

In [5]:
def check_inp_type(inp, expect_type):
    try:
        inp = np.array(inp) if type(inp) != expect_type else inp
    except:
        pass
    assert type(inp) == expect_type, 'Input variable type error: expect type {} but now it is type {}'.format(expect_type, type(inp))

In [6]:
def cell_decide(cell_val, target_arr, is_stochastic = False, seed = None):
    # cell_val: int; 0 or 1 ONLY
    # target_arr: numpy.array; elements = 0 or 1 ONLY
    # import numpy as np
    check_inp_type(inp = target_arr, expect_type = type(np.array([])))
    
    one_counts = np.count_nonzero(target_arr==1)
    zero_counts = np.count_nonzero(target_arr==0)

    assert one_counts + zero_counts == np.product(target_arr.shape), "Elements in target_arr should be 0 or 1 ONLY."
    
    # ......................................
    # offset for the cell_val
    if cell_val == 0:
        zero_counts -= 1
    elif cell_val == 1:
        one_counts -= 1
    
    # ......................................
    # Bandwagon Effect
    # Follow the trend, and assume no change if trend cannot be determined (ie zero_counts == one_counts)
    # get new cell_val
    if is_stochastic:
        if seed != None:
            np.random.seed(seed)
        num_count = zero_counts + one_counts
        ret = np.random.choice([0,1], p=[zero_counts/num_count, one_counts/num_count])
    else:
        ret = 0 if zero_counts > one_counts else 1 if one_counts > zero_counts else cell_val
    return ret

In [7]:
def get_target_arr(idx_i, idx_j, env_arr):
    check_inp_type(inp = env_arr, expect_type = type(np.array([])))
    
    # ................................................
    # get range of idx_i and idx_j
    # note: the max values are to be expected to be reachable. Contrast index properties in lists or tuples.
    idx_i_range = [idx_i-1, idx_i+1]
    idx_j_range = [idx_j-1, idx_j+1]
    
    # ................................................
    # check and modify the min values of idx_i, idx_j
    idx_i_range[0] = idx_i if idx_i_range[0] < 0 else idx_i_range[0]
    idx_j_range[0] = idx_j if idx_j_range[0] < 0 else idx_j_range[0]
    
    # ................................................
    # check and modify the max values of idx_i, idx_j
    idx_i_range[1] = idx_i if idx_i_range[1]+1 > env_arr.shape[0] else idx_i_range[1]
    idx_j_range[1] = idx_j if idx_j_range[1]+1 > env_arr.shape[1] else idx_j_range[1]
    
    # ................................................
    # get target and surroundings
    target_arr = env_arr[idx_i_range[0]:idx_i_range[1]+1, idx_j_range[0]:idx_j_range[1]+1]
    
    return target_arr

In [8]:
def main(env_arr, maxiter, trend_conv_pct_lim = 0.95, unchanged_cells_thr = None, is_stochastic = False, seed = None, print_process = True):
    
    # ................................................
    # inputs update
    # for terminating when the number of unchanged cells >= unchanged_cells_thr
    unchanged_cells_thr = np.product(env_arr.shape) if unchanged_cells_thr == None else unchanged_cells_thr  
    
    # ................................................
    # inputs checking
    check_inp_type(inp = env_arr, expect_type = type(np.array([])))
    check_inp_type(inp = maxiter, expect_type = type(1))  
    check_inp_type(inp = trend_conv_pct_lim, expect_type = type(0.1))  
    
    # ................................................
    # main
    env_arr_shape = env_arr.shape
    i_max, j_max = env_arr_shape[0], env_arr_shape[1]    # unreachable; similar to index properties in lists and tuples
    prev_env_arr = env_arr
    new_env_arr = np.empty(env_arr_shape)
    
    hist = []
    itercounter = 0
    unchanged_cells_count = 0
    zero_counts = np.count_nonzero(env_arr == 0)
    one_counts = np.count_nonzero(env_arr == 1)
    sum_counts = zero_counts + one_counts
    trend_conv_pct = max(zero_counts, one_counts)/sum_counts
        
    # print progress
    hist.append((itercounter,trend_conv_pct,zero_counts,one_counts))
    if print_process:
        print(f'''--------------------------------
{itercounter}: trend_conv_pct = {trend_conv_pct}, zero_counts = {zero_counts}, one_counts = {one_counts}
{new_env_arr}
--------------------------------
        ''')    
            
    while itercounter < maxiter and unchanged_cells_count < unchanged_cells_thr and trend_conv_pct < trend_conv_pct_lim :
        new_env_arr = np.empty(env_arr_shape)
        
        for i in range(0, i_max):
            for j in range(0, j_max):
                prev_target_arr = get_target_arr(idx_i = i, idx_j = j, env_arr = prev_env_arr)
                prev_cell_val = prev_env_arr[i, j]
                new_cell_val = cell_decide(cell_val = prev_cell_val, target_arr = prev_target_arr, is_stochastic = is_stochastic, seed = seed)
                
                # input new_cell_val into new_env_arr
                new_env_arr[i, j] = new_cell_val       
        
        # update variables for loop conditions for the next loop
        unchanged_cells_count = np.sum(new_env_arr == prev_env_arr)
        itercounter += 1
        
        zero_counts = np.count_nonzero(new_env_arr == 0)
        one_counts = np.count_nonzero(new_env_arr == 1)
        sum_counts = zero_counts + one_counts
        trend_conv_pct = max(zero_counts, one_counts)/sum_counts
        
        # update prev_env_arr for the next loop
        prev_env_arr = new_env_arr
        
        # print progress
        hist.append((itercounter,trend_conv_pct,zero_counts,one_counts))
        if print_process:
            print(f'{itercounter}: trend_conv_pct = {trend_conv_pct}, zero_counts = {zero_counts}, one_counts = {one_counts}') 
    
    win_num = None if trend_conv_pct < trend_conv_pct_lim else 0 if zero_counts > one_counts else 1   
    return {'iters': itercounter, 'maxiter': maxiter, 'unchanged_cells_count': unchanged_cells_count, 'win_num': win_num,
            'hist' : hist,
            'env_arr': env_arr, 'new_env_arr': new_env_arr}

In [None]:
# inputs
output_folder = r'C:\Users\priva\Downloads\Bandwagon Effect Testing 20221128 2321'

zeros_ones_diff = 0
counter_1_lim = 100

is_stochastic = True
seed_2 = None

test_reps = 100
size = (48, 48)
maxiter = 1500

# env_arr

# -----------------------------------------------
seed_1 = 0
counter_1 = 0

while counter_1 < counter_1_lim:    
    np.random.seed(seed_1)
    env_arr = np.random.randint(0, 2, size=size)
    one_counts = np.count_nonzero(env_arr==1)
    zero_counts = np.count_nonzero(env_arr==0)
    
    print((seed_1, zero_counts, one_counts))
    if zero_counts-one_counts == zeros_ones_diff:
        start_time = dt.datetime.now()
        
        print(f'''XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{counter_1}: seed_1 = {seed_1}
        
        ''')
        
        init_env_arr_summary = {'seed_1': seed_1, 'zero_counts': zero_counts, 'one_counts': one_counts,
                                'size': str(size), 'maxiter': maxiter, 'is_stochastic': is_stochastic, 'seed_2': seed_2}

        # inputs
        save_as_csv(input_df = pd.DataFrame(init_env_arr_summary, index=[0]), 
                    output_folder = output_folder, 
                    output_filename_prefix = f'init env arr summary_{seed_1}_{size}_{maxiter}_{is_stochastic}_{seed_2}')


        summary = {'iters':[], 'win_num':[]}
        for i in range(test_reps):
            ret = main(env_arr = env_arr, 
                       maxiter = 1000, 
                       trend_conv_pct_lim = 0.95, 
                       is_stochastic = is_stochastic, 
                       print_process = False, 
                       seed = seed_2)
            summary['iters'] = summary['iters'] + [ret['iters']]
            summary['win_num'] = summary['win_num'] + [ret['win_num']]
        # summary
        summary_df = pd.DataFrame(summary)

        # -----------------------------------------------
        # save as csv
        save_as_csv(input_df = summary_df, 
                    output_folder = output_folder, 
                    output_filename_prefix = f'summary_{seed_1}_{size}_{maxiter}_{is_stochastic}_{seed_2}')

        save_as_csv(input_df = summary_df.describe(), 
                    output_folder = output_folder, 
                    output_filename_prefix = f'summary describe_{seed_1}_{size}_{maxiter}_{is_stochastic}_{seed_2}')
        counter_1 += 1        
        
        print(f'Execution time: {dt.datetime.now()-start_time}')
        
    seed_1 += 1
    
    if counter_1 == 1:
        break

(0, 1139, 1165)
(1, 1182, 1122)
(2, 1139, 1165)
(3, 1202, 1102)
(4, 1134, 1170)
(5, 1144, 1160)
(6, 1148, 1156)
(7, 1165, 1139)
(8, 1170, 1134)
(9, 1156, 1148)
(10, 1161, 1143)
(11, 1143, 1161)
(12, 1151, 1153)
(13, 1182, 1122)
(14, 1172, 1132)
(15, 1155, 1149)
(16, 1135, 1169)
(17, 1155, 1149)
(18, 1157, 1147)
(19, 1134, 1170)
(20, 1160, 1144)
(21, 1113, 1191)
(22, 1142, 1162)
(23, 1154, 1150)
(24, 1154, 1150)
(25, 1164, 1140)
(26, 1194, 1110)
(27, 1168, 1136)
(28, 1172, 1132)
(29, 1165, 1139)
(30, 1146, 1158)
(31, 1148, 1156)
(32, 1202, 1102)
(33, 1188, 1116)
(34, 1153, 1151)
(35, 1151, 1153)
(36, 1089, 1215)
(37, 1137, 1167)
(38, 1151, 1153)
(39, 1152, 1152)
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
0: seed_1 = 39
        
        
Output file is saved to 
C:\Users\priva\Downloads\Bandwagon Effect Testing 20221128 2321
Filename: init env arr summary_39_(48, 48)_1500_True_None_20221128-2322-38.csv


In [130]:
# # inputs
# save_as_csv(input_df = summary_df, 
#             output_folder = r'C:\Users\priva\Downloads\Bandwagon Effect Testing', 
#             output_filename_prefix = f'summary_{seed_1}_{size}_{maxiter}_{is_stochastic}_{seed_2}')

In [113]:
# # inputs
# save_as_csv(input_df = summary_df.describe(), 
#             output_folder = r'C:\Users\priva\Downloads\Bandwagon Effect Testing', 
#             output_filename_prefix = f'summary describe_{seed_1}_{size}_{maxiter}_{is_stochastic}_{seed_2}')