# Training Set Trials (March 2, 2024)

All subgroups now use the same pairs of contrasts (Pairs 1 and 2) and only vary on dialect in Conditions 1 and 2. Condition 3 only has a single group and varies the speakers by Block.

Lists: 1a/b/c, 2a/b/c, 3b1/b1/b3

In [1]:
from itertools import permutations, product
import pandas as pd
import numpy as np

# Imports the pairs list as selected from the corpora
df = pd.read_csv("/00_NG_Pairs.txt", sep = ",")
df.head()

Unnamed: 0,Word1,Word2,Pair,Contrast
0,κάπου,κήπου,1,1
1,κήπο,κόπο,2,1
2,κύβος,κάβος,3,1
3,κάπας,τάπας,1,2
4,κάπες,τάπες,2,2


In [2]:
# Duplicates one of the words in the pair twice
dfDup = df
dfDup['Word3'] = dfDup.loc[:, 'Word1']
dfDup['Word4'] = dfDup.loc[:, 'Word1']
dfDup

Unnamed: 0,Word1,Word2,Pair,Contrast,Word3,Word4
0,κάπου,κήπου,1,1,κάπου,κάπου
1,κήπο,κόπο,2,1,κήπο,κήπο
2,κύβος,κάβος,3,1,κύβος,κύβος
3,κάπας,τάπας,1,2,κάπας,κάπας
4,κάπες,τάπες,2,2,κάπες,κάπες
...,...,...,...,...,...,...
85,δώσει,ζώσει,4,0,δώσει,δώσει
86,θα,να,4,0,θα,θα
87,ζέψε,νέψε,4,0,ζέψε,ζέψε
88,νίκη,δίκη,4,0,νίκη,νίκη


## Pairs lists for the groups

In [3]:
# Creating the lists of pairs
# All training lists now use the same two pairs per contrast, with the third used in the test
pairsA = dfDup.loc[dfDup['Pair'].isin([1,2])]

# Create frames with just the Contrast column to shove back on later
only_ContrastA = pairsA.reset_index()

pairsA.head()

Unnamed: 0,Word1,Word2,Pair,Contrast,Word3,Word4
0,κάπου,κήπου,1,1,κάπου,κάπου
1,κήπο,κόπο,2,1,κήπο,κήπο
3,κάπας,τάπας,1,2,κάπας,κάπας
4,κάπες,τάπες,2,2,κάπες,κάπες
6,κέντα,τέντα,1,3,κέντα,κέντα


In [22]:
pairs_test = dfDup.loc[dfDup['Pair'].isin([3,4])]
pairs_test.to_csv("pairs_34.csv", index=False)

### Randomize the position of the distinct word

Read in the randomized and manually balanced sheets. An earlier version of this script randomized the position of the unique word across the four columns using this code:
```python
pairsA_shuff = pd.DataFrame()
for a in range(0, len(pairsA)):
    temp = list(pairsA.iloc[a])
    np.random.shuffle(temp)
    pairsA_shuff = pd.concat([pairsA_shuff, pd.DataFrame([temp], columns=['Word1','Word2','Word3','Word4'])], ignore_index=True)
    if temp[0] == temp[1]:
        pairsA_shuff.loc[a, 'Expected'] = 'm'
    else:
        pairsA_shuff.loc[a, 'Expected'] = 'z'
```

In [4]:
# Reading the balanced sheets
pairsA_shuff = pd.read_csv("/Balanced expected/manual_samepairs.csv", sep = ",")

pairsA_shuff.head()

Unnamed: 0,Word1,Word2,Word3,Word4,Expected
0,κάπου,κάπου,κήπου,κάπου,m
1,κήπο,κήπο,κήπο,κόπο,m
2,κάπας,κάπας,τάπας,κάπας,m
3,τάπες,κάπες,κάπες,κάπες,z
4,κέντα,κέντα,τέντα,κέντα,m


In [5]:
# Shove the contrast column back on
pairsA_shuff = pairsA_shuff.join(only_ContrastA['Contrast'], how="left")

pairsA_shuff.head()

Unnamed: 0,Word1,Word2,Word3,Word4,Expected,Contrast
0,κάπου,κάπου,κήπου,κάπου,m,1
1,κήπο,κήπο,κήπο,κόπο,m,1
2,κάπας,κάπας,τάπας,κάπας,m,2
3,τάπες,κάπες,κάπες,κάπες,z,2
4,κέντα,κέντα,τέντα,κέντα,m,3


## Group 2 speaker lists

In [6]:
# Group 2 lists
keys2a = ['AF1', 'AF2', 'AM1', 'AM2']
keys2b = ['TF1', 'TM1', 'TF2', 'TM2']
keys2c = ['CF1', 'CM6', 'CF2', 'CM3']

# Finds every possible ordering of the elements in keys, without repeating an ordering

# Create the basic ordering possibilities for each group
speakers2a = pd.DataFrame(list(permutations(keys2a, 4)), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])

speakers2b = pd.DataFrame(list(permutations(keys2b, 4)), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])

speakers2c = pd.DataFrame(list(permutations(keys2c, 4)), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])

In [7]:
speakers2a

Unnamed: 0,speaker1,speaker2,speaker3,speaker4
0,AF1,AF2,AM1,AM2
1,AF1,AF2,AM2,AM1
2,AF1,AM1,AF2,AM2
3,AF1,AM1,AM2,AF2
4,AF1,AM2,AF2,AM1
5,AF1,AM2,AM1,AF2
6,AF2,AF1,AM1,AM2
7,AF2,AF1,AM2,AM1
8,AF2,AM1,AF1,AM2
9,AF2,AM1,AM2,AF1


### Generate and randomize the full set of 36 trials

In [14]:
# Repeat the original 24 permutations 2 times to get 48 trials, randomize the order, then drop a quarter of them
sp2a = pd.DataFrame(np.repeat(speakers2a.values, 2, axis=0), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
# Randomize the order of the rows, reset the index
sp2a = sp2a.sample(frac = 0.75, ignore_index = True)
sp2a

sp2b = pd.DataFrame(np.repeat(speakers2b.values, 2, axis=0), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
# Randomize the order of the rows, reset the index
sp2b = sp2b.sample(frac = 0.75, ignore_index = True)
sp2b

sp2c = pd.DataFrame(np.repeat(speakers2c.values, 2, axis=0), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
# Randomize the order of the rows, reset the index
sp2c = sp2c.sample(frac = 0.75, ignore_index = True)
sp2c

Unnamed: 0,speaker1,speaker2,speaker3,speaker4
0,CM6,CF1,CM3,CF2
1,CF1,CM3,CM6,CF2
2,CF2,CM6,CF1,CM3
3,CM3,CM6,CF2,CF1
4,CF1,CM6,CF2,CM3
5,CF2,CM3,CM6,CF1
6,CF1,CM3,CF2,CM6
7,CF2,CM3,CF1,CM6
8,CF2,CM6,CM3,CF1
9,CM6,CF2,CF1,CM3


## Group 3 speaker lists

In [15]:
# Group 3 lists
# All 12 speakers appear so each block needs to be different

d = {'speaker1': ['CM6', 'CM3', 'CF1'], 'speaker2': ['AF1', 'CF2', 'TM2'], 'speaker3': ['TM1', 'TF1', 'AF2'], 'speaker4': ['TF2', 'AM1', 'AM2']}
multi_speakers = pd.DataFrame(data=d, dtype=str)
multi_speakers

Unnamed: 0,speaker1,speaker2,speaker3,speaker4
0,CM6,AF1,TM1,TF2
1,CM3,CF2,TF1,AM1
2,CF1,TM2,AF2,AM2


In [16]:
# Group 3a, all blocks

b1 = pd.DataFrame(permutations(multi_speakers.iloc[0], 4), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
b2 = pd.DataFrame(permutations(multi_speakers.iloc[1], 4), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
b3 = pd.DataFrame(permutations(multi_speakers.iloc[2], 4), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
b1

Unnamed: 0,speaker1,speaker2,speaker3,speaker4
0,CM6,AF1,TM1,TF2
1,CM6,AF1,TF2,TM1
2,CM6,TM1,AF1,TF2
3,CM6,TM1,TF2,AF1
4,CM6,TF2,AF1,TM1
5,CM6,TF2,TM1,AF1
6,AF1,CM6,TM1,TF2
7,AF1,CM6,TF2,TM1
8,AF1,TM1,CM6,TF2
9,AF1,TM1,TF2,CM6


### Generate full sets

In [17]:
### Group 3A
full_b1 = pd.DataFrame(np.repeat(b1.values, 2, axis=0), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
# Randomize the order of the rows, reset the index
full_b1 = full_b1.sample(frac = 0.75, ignore_index = True)

full_b2 = pd.DataFrame(np.repeat(b2.values, 2, axis=0), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
# Randomize the order of the rows, reset the index
full_b2 = full_b2.sample(frac = 0.75, ignore_index = True)

full_b3 = pd.DataFrame(np.repeat(b3.values, 2, axis=0), columns=['speaker1', 'speaker2', 'speaker3', 'speaker4'])
# Randomize the order of the rows, reset the index
full_b3 = full_b3.sample(frac = 0.75, ignore_index = True)

full_b1

Unnamed: 0,speaker1,speaker2,speaker3,speaker4
0,TM1,TF2,CM6,AF1
1,AF1,TF2,CM6,TM1
2,TM1,CM6,TF2,AF1
3,TM1,CM6,TF2,AF1
4,TF2,TM1,CM6,AF1
5,CM6,TM1,TF2,AF1
6,TF2,CM6,TM1,AF1
7,TM1,TF2,AF1,CM6
8,AF1,CM6,TF2,TM1
9,AF1,CM6,TM1,TF2


## Merging the final trials

Taking the generated speaker lists and merging with the word lists to create the final table in the format of Speaker_Word(.wav)

In [18]:
# Group 1
# As the speaker is always the same for Condition 1 we can simply tack on the appropriate tag without randomization

trials1a = pairsA_shuff.copy()
trials1a['Word1'] = 'AF1_' + trials1a['Word1'].astype(str)
trials1a['Word2'] = 'AF1_' + trials1a['Word2'].astype(str)
trials1a['Word3'] = 'AF1_' + trials1a['Word3'].astype(str)
trials1a['Word4'] = 'AF1_' + trials1a['Word4'].astype(str)
trials1a.to_csv('1a_training_same.csv', index=False)

trials1b = pairsA_shuff.copy()
trials1b['Word1'] = 'TF1_' + trials1b['Word1'].astype(str)
trials1b['Word2'] = 'TF1_' + trials1b['Word2'].astype(str)
trials1b['Word3'] = 'TF1_' + trials1b['Word3'].astype(str)
trials1b['Word4'] = 'TF1_' + trials1b['Word4'].astype(str)
trials1b.to_csv('1b_training_same.csv', index=False)

trials1c = pairsA_shuff.copy()
trials1c['Word1'] = 'CF1_' + trials1c['Word1'].astype(str)
trials1c['Word2'] = 'CF1_' + trials1c['Word2'].astype(str)
trials1c['Word3'] = 'CF1_' + trials1c['Word3'].astype(str)
trials1c['Word4'] = 'CF1_' + trials1c['Word4'].astype(str)
trials1c.to_csv('1c_training_same.csv', index=False)

### Group 2 trials

In [19]:
### Compiling the trial lists for OS
## word lists: pairsA_shuff, pairsB_shuff, pairsC_shuff
## group 2: sp2a, sp2b, sp2c
## group 3: full_b13a, full_b13b, full_b13c

## 2a
group2a = pd.concat([sp2a, pairsA_shuff], ignore_index=False, axis=1)
trials2a = pd.DataFrame()
trials2a['first_sound'] = group2a[['speaker1', 'Word1']].agg('_'.join, axis=1)
trials2a['second_sound'] = group2a[['speaker2', 'Word2']].agg('_'.join, axis=1)
trials2a['third_sound'] = group2a[['speaker3', 'Word3']].agg('_'.join, axis=1)
trials2a['fourth_sound'] = group2a[['speaker4', 'Word4']].agg('_'.join, axis=1)
trials2a = trials2a.join(pairsA_shuff['Expected'], how="left")
trials2a = trials2a.join(pairsA_shuff['Contrast'], how="left")

## 2b
group2b = pd.concat([sp2b, pairsA_shuff], ignore_index=False, axis=1)
trials2b = pd.DataFrame()
trials2b['first_sound'] = group2b[['speaker1', 'Word1']].agg('_'.join, axis=1)
trials2b['second_sound'] = group2b[['speaker2', 'Word2']].agg('_'.join, axis=1)
trials2b['third_sound'] = group2b[['speaker3', 'Word3']].agg('_'.join, axis=1)
trials2b['fourth_sound'] = group2b[['speaker4', 'Word4']].agg('_'.join, axis=1)
trials2b = trials2b.join(pairsA_shuff['Expected'], how="left")
trials2b = trials2b.join(pairsA_shuff['Contrast'], how="left")

## 2c
group2c = pd.concat([sp2c, pairsA_shuff], ignore_index=False, axis=1)
trials2c = pd.DataFrame()
trials2c['first_sound'] = group2c[['speaker1', 'Word1']].agg('_'.join, axis=1)
trials2c['second_sound'] = group2c[['speaker2', 'Word2']].agg('_'.join, axis=1)
trials2c['third_sound'] = group2c[['speaker3', 'Word3']].agg('_'.join, axis=1)
trials2c['fourth_sound'] = group2c[['speaker4', 'Word4']].agg('_'.join, axis=1)
trials2c = trials2c.join(pairsA_shuff['Expected'], how="left")
trials2c = trials2c.join(pairsA_shuff['Contrast'], how="left")

# Save to csv
trials2a.to_csv('2a_training_same.csv', index=False)
trials2b.to_csv('2b_training_same.csv', index=False)
trials2c.to_csv('2c_training_same.csv', index=False)

### Group 3 a trials

In [20]:
## group 3: full_b13a, full_b13b, full_b13c
## 3a
print(len(pairsA_shuff))
group3aB1 = pd.concat([full_b1, pairsA_shuff], ignore_index=False, axis=1)
group3aB2 = pd.concat([full_b2, pairsA_shuff], ignore_index=False, axis=1)
group3aB3 = pd.concat([full_b3, pairsA_shuff], ignore_index=False, axis=1)

### b1
trials3aB1 = pd.DataFrame()
trials3aB1['first_sound'] = group3aB1[['speaker1', 'Word1']].agg('_'.join, axis=1)
trials3aB1['second_sound'] = group3aB1[['speaker2', 'Word2']].agg('_'.join, axis=1)
trials3aB1['third_sound'] = group3aB1[['speaker3', 'Word3']].agg('_'.join, axis=1)
trials3aB1['fourth_sound'] = group3aB1[['speaker4', 'Word4']].agg('_'.join, axis=1)
trials3aB1 = trials3aB1.join(pairsA_shuff['Expected'], how="left")
trials3aB1 = trials3aB1.join(pairsA_shuff['Contrast'], how="left")

### b2
trials3aB2 = pd.DataFrame()
trials3aB2['first_sound'] = group3aB2[['speaker1', 'Word1']].agg('_'.join, axis=1)
trials3aB2['second_sound'] = group3aB2[['speaker2', 'Word2']].agg('_'.join, axis=1)
trials3aB2['third_sound'] = group3aB2[['speaker3', 'Word3']].agg('_'.join, axis=1)
trials3aB2['fourth_sound'] = group3aB2[['speaker4', 'Word4']].agg('_'.join, axis=1)
trials3aB2 = trials3aB2.join(pairsA_shuff['Expected'], how="left")
trials3aB2 = trials3aB2.join(pairsA_shuff['Contrast'], how="left")

### b3
trials3aB3 = pd.DataFrame()
trials3aB3['first_sound'] = group3aB3[['speaker1', 'Word1']].agg('_'.join, axis=1)
trials3aB3['second_sound'] = group3aB3[['speaker2', 'Word2']].agg('_'.join, axis=1)
trials3aB3['third_sound'] = group3aB3[['speaker3', 'Word3']].agg('_'.join, axis=1)
trials3aB3['fourth_sound'] = group3aB3[['speaker4', 'Word4']].agg('_'.join, axis=1)
trials3aB3 = trials3aB3.join(pairsA_shuff['Expected'], how="left")
trials3aB3 = trials3aB3.join(pairsA_shuff['Contrast'], how="left")

36


In [21]:
# Save to csv
trials3aB1.to_csv('3b1_training_same.csv', index=False)
trials3aB2.to_csv('3b2_training_same.csv', index=False)
trials3aB3.to_csv('3b3_training_same.csv', index=False)