# Playground fast

> Some initial design ideas

In [None]:
#| default_exp playground

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas as pd
import itertools
from joblib import Parallel, delayed
import random
import json

In [None]:
Ss = ['red','green']
Rs = ['column_1','column_3']
possible_trials_1 = list(itertools.product(Ss, Rs))

In [None]:
Ss = ['blue','yellow']
Rs = ['column_2', 'column_4']
possible_trials_2 = list(itertools.product(Ss, Rs))

In [None]:
sequences = []
for i in range(100):
    list1 = random.choices(possible_trials_1, k = 5)
    list2 = random.choices(possible_trials_2, k = 5)
    sequence = [item for pair in zip(list1, list2) for item in pair]
    sequences.append(sequence)
with open("post_sequences.json", "w") as f:
    json.dump(sequences,f, indent = 2)


In [None]:
# Defining the two sets
#Ss = ['L1','L2','L3','R1','R2','R3']
Ss = ['L1','L2','R1','R2']
#Ss = ['L1','R1']
Rs = ['L', 'R']

# Generating all combinations by crossing the two sets
possible_trials = list(itertools.product(Ss, Rs))
possible_trials = pd.DataFrame(possible_trials)
possible_trials.columns = ["stimulus","location"]
possible_trials[["stimulus","location"]] = possible_trials[["stimulus","location"]].astype('category')

possible_trials['sr_congruency'] = possible_trials.stimulus.str[0] == possible_trials.location
possible_trials

Unnamed: 0,stimulus,location,sr_congruency
0,L1,L,True
1,L1,R,False
2,L2,L,True
3,L2,R,False
4,R1,L,False
5,R1,R,True
6,R2,L,False
7,R2,R,True


In [None]:
def calculate_sequential_effects(sequence):
    shifted = sequence.shift(1)
    sequence[['s_repeat','l_repeat','sr_repeat']] = sequence == shifted
    sequence['partial_mismatch'] = sequence['s_repeat'] == sequence['sr_repeat']
    sequence.loc[0,'partial_mismatch'] = False
    return sequence

def generate_sequence(possible_trials, N, random_state):
    sequence = possible_trials.sample(N, replace = True, ignore_index = True, random_state=random_state)
    #sequence['contingency'] = sequence.groupby(['stimulus', 'location']).cumcount() + 1
    return sequence

def check_balance(sequence, random_state):
    sequence['contingency'] = sequence.groupby(['stimulus', 'location']).cumcount() + 1
    sequence = sequence.iloc[1:]
    stim_balance = sequence['stimulus'].value_counts(normalize=True).min() - (1/sequence['stimulus'].nunique())
    loc_balance = sequence['location'].value_counts(normalize=True).min() - .5
    agg = sequence.groupby('sr_repeat')[['sr_congruency','s_repeat','l_repeat','partial_mismatch','contingency']].mean()
    balances = agg.loc[True] - agg.loc[False]
    balances.loc['stim_balance'] = stim_balance
    balances.loc['loc_balance'] = loc_balance
    balances.loc['cC_iI'] = sequence.sr_repeat.mean()
    balances.loc['random_state'] = random_state
    return balances

def probe_sequence(possible_trials, N, random_state):
    sequence = generate_sequence(possible_trials, N, random_state)
    sequence = calculate_sequential_effects(sequence)
    balance = check_balance(sequence,random_state)
    return balance

In [None]:
%%time
N = 33
no_probes = 1000000
results = Parallel(n_jobs=-1)(delayed(probe_sequence)(possible_trials, N, random.randint(0, 4294967295)) for i in range(no_probes))

CPU times: user 4.01 s, sys: 360 ms, total: 4.37 s
Wall time: 14.4 s


In [None]:
df = pd.DataFrame(results)
df['random_state'] = df.random_state.astype(int)
#df.query("(partial_mismatch>-.15) and (partial_mismatch<.15)").query("(cC_iI>.45) and (cC_iI<.55)")

In [None]:
df.to_csv('df.csv')

In [None]:
for i in range(1000):
    N = 33
    no_probes = 100000
    results = Parall el(n_jobs=-1)(delayed(probe_sequence)(possible_trials, N, random.randint(0, 4294967295)) for i in range(no_probes))
    df = pd.DataFrame(results)
    df['random_state'] = df.random_state.astype(int)
    df.to_csv(f'../data/processed/results_33_4s_{i}')

In [None]:
from tqdm import tqdm
filtered_dfs = []
for i in tqdm(range(411)):
    df = pd.read_csv(f'../data/processed/results_33_4s_{i}')
    df = df.query("(partial_mismatch>-.05) and (partial_mismatch<.05)").query("(cC_iI>.45) and (cC_iI<.55)").query("(s_repeat>-.15) and (s_repeat<.15)").query("(l_repeat>-.05) and (l_repeat<.05)")
    if len(df):
        filtered_dfs.append(df)

100%|█████████████████████████████████████████| 411/411 [01:01<00:00,  6.67it/s]


In [None]:
pd.concat(filtered_dfs)

Unnamed: 0.1,Unnamed: 0,sr_congruency,s_repeat,l_repeat,partial_mismatch,contingency,stim_balance,loc_balance,cC_iI,random_state
36666,36666,0.1875,0.0,0.0,0.0,-0.1875,-0.125,-0.125,0.5,3817207821
42233,42233,-0.203922,0.062745,0.003922,0.003922,1.043137,-0.09375,-0.21875,0.46875,3990625401
79196,79196,-0.25,-0.125,0.0,0.0,1.375,-0.09375,-0.09375,0.5,42735201
71539,71539,0.125,-0.125,0.0,0.0,0.5,-0.15625,-0.03125,0.5,161287501
31061,31061,0.0,-0.125,0.0,0.0,-0.0625,-0.09375,-0.03125,0.5,3580945701
31709,31709,-0.125,0.0,0.0,0.0,0.125,-0.09375,-0.125,0.5,3666360157
13704,13704,0.0,-0.125,0.0,0.0,1.3125,-0.1875,-0.03125,0.5,2841646595
82291,82291,-0.196078,0.062745,0.003922,0.003922,0.054902,-0.1875,0.0,0.46875,973945768
97451,97451,-0.180392,0.062745,-0.003922,-0.003922,1.25098,-0.15625,-0.0625,0.53125,1559803718
81143,81143,-0.003922,-0.062745,-0.003922,-0.003922,-0.282353,-0.21875,-0.0625,0.46875,2779833467


In [None]:
df.query("(partial_mismatch>-.05) and (partial_mismatch<.05)").query("(cC_iI>.45) and (cC_iI<.55)").query("(s_repeat>-.15) and (s_repeat<.15)").query("(l_repeat>-.05) and (l_repeat<.05)")

Unnamed: 0,sr_congruency,s_repeat,l_repeat,partial_mismatch,contingency,stim_balance,loc_balance,cC_iI,random_state


In [None]:
df = calculate_sequential_effects(generate_sequence(possible_trials, N, 163833))
df['contingency'] = df.groupby(['stimulus', 'location']).cumcount() + 1
df

Unnamed: 0,stimulus,location,sr_congruency,s_repeat,l_repeat,sr_repeat,partial_mismatch,contingency
0,R1,R,True,False,False,False,False,1
1,R1,R,True,True,True,True,True,2
2,L2,R,False,False,True,False,True,1
3,L1,R,False,False,True,True,False,1
4,R1,L,False,False,False,True,False,1
5,R2,R,True,False,False,False,True,1
6,L1,L,True,False,False,True,False,1
7,R1,R,True,False,False,True,False,3
8,R2,L,False,False,False,False,True,1
9,L1,L,True,False,True,False,True,2


In [None]:
df.query('sr_repeat==False').contingency.mean()

2.75

In [None]:
df.query('sr_repeat==True').contingency.mean()

3.2941176470588234