# Trial Generation Worksheet

## Load SenteType List

In [12]:
import pandas as pd
from numpy.random import RandomState
from itertools import product

sentTypes=pd.read_csv('./mockTrialGen/mockList.csv')
sentTypes.head()

Unnamed: 0,sentType
0,1
1,2
2,3
3,4
4,5


## Steps
1. Assign Relatedness:
    - Shuffle, split list in 2
    - 1st Half: Rel, 2nd Half: Unrel
    - Store subsets *might wanna change this maybe to reduce storage steps*?
2. Assign Speaker:
    - For each subsets of Relatedness assigned lists:
        - shuffle, split in 2
        - First half: Native, 2nd Half: Non-Natuve
    - Store subsets
3. Join Lists:
    - Concatenate all sublists created from step 2
    - Re-shuffle for good measure.
4. From this global file, re-create the file name as a **UNIQUE ID**.
5. Merge this table with the file containing all other measures and variables?
    *This step is a maybe... After all if I have the info, I can always go back and find the variables like cloze ratings etc from the index files post-hoc, rather that making the programme always carry it.**
    -> Probably better to stick to only the experimentally needed info in that trial list.
    
    
## One thing I could do
- Use the assign block funtion, then replace the block numbers (Or make the assign block already assign the correct names.
- Make sure to turn off the suffling every time I run the add_block modification.

In [17]:
######## FUNCTIONS
#----------------------
def simple_shuffle(frame, block=None, times=10, seed=None, resetInd=False):
    """
    Shuffles trials a few times.
    
    :param pandas.DataFrame frame: Trials to be shuffled.
    :param block: Optional column to groupby before shuffling.
    :type block: str or None.
    :param int times: Number of times to shuffle. Defaults to 10.
    :param seed: Seed random number generator.
    :type seed: int or None
    :returns: Trial list with rows in random order.
    :rtype: pandas.DataFrame
    """
    prng = RandomState(seed)
    
    def _shuffle(chunk):
        for _ in range(times):
            chunk = chunk.reindex(prng.permutation(chunk.index))
        if resetInd is True:
            chunk=chunk.reset_index(drop=True)
        return chunk
    
    if block is None:
        return _shuffle(frame)
    else:
        return frame.groupby(block).apply(_shuffle)
    
def add_block(frame, size, name='block', start_at=0, id_col=None, seed=None):
    """
    Creates a new column for block.
    
    :param pandas.DataFrame frame: Trials to be assigned blocks.
    :param int size: Length of each block.
    :param id_col: Column to group by before blocking. Assures that blocks 
        consist of approximately the same number of trials for each unique
        value in id_col
    :type id_col: str or None
    :param seed: Seed random number generator.
    :type seed: int or None
    :returns: Trial list with new column for block.
    :rtype: pandas.DataFrame
    """
    def _assigner(blocks, prng):
        prng.shuffle(blocks)
        i = 0
        while True:
            yield blocks[i]
            if (i+1)%len(blocks):
                prng.shuffle(blocks)
            i = (i+1)%len(blocks)
            
    prng = RandomState(seed)
    blocks = range(len(frame)/size)
    assigner = _assigner(blocks, prng)
    
    def _add(chunk):
        chunk[name] = [assigner.next() for _ in xrange(len(chunk))]
        return chunk
    
    if id_col is None:
        new_frame = _add(frame).sort(name)
    else:
        new_frame = frame.groupby(id_col).apply(_add).sort(name)
    
    new_frame[name] = new_frame[name] + start_at
    return new_frame

    

######## IMPLEMENTATIONS
#-------------------------   

relAss=simple_shuffle(sentTypes,seed=1,resetInd=True)

test=add_block(sentTypes,25,start_at=1,seed=1)
test.head()



Unnamed: 0,sentType,block
99,100,1
57,58,1
70,71,1
25,26,1
33,34,1


In [29]:
gp=test.groupby('block')
gp.get_group(1)

Unnamed: 0,sentType,block
99,100,1
57,58,1
70,71,1
25,26,1
33,34,1
35,36,1
21,22,1
81,82,1
68,69,1
66,67,1
