The `verbose` flag is specified here, if enabled more information is printed showing what is being changed, etc.

In [3]:
verbose = True

Import all required libraries here.

In [4]:
from collections import namedtuple
import re

First thing, import all the data from the `.boris` file, and print out how many observations are in the file, as a sanity check that all is as expected with the input data.

In [5]:
all_occurrance = 'all occurrence observations.boris'

import json
with open(all_occurrance, 'r') as f:
    data = json.load(f)
    original_observations = data['observations']
    num_events = 0
    for _,v in original_observations.items():
        num_events = num_events + len(v['events'])
    print(f'Have {len(original_observations)} observations with {num_events} events in total')

Have 79 observations with 11188 events in total


Create a class to represent the events.

In [6]:
class Event:
    __slots__ = 'timestamp', 'subject', 'type', 'recipient'

    def __init__(self, src):
        self.timestamp = src[0]
        self.subject = src[1]
        self.type = src[2]
        self.recipient = src[3]
        
        # mwr without a recipient is always with the partner
        if self.type == 'mwr' and not self.recipient:
            self.recipient = 'partner'
                
        assert self.timestamp >= 0, self
        # TODO allow empty subject for the moment
        assert any(self.subject == x for x in ['male_A', 'female_A', 'male_B', 'female_B', '']) or self.type == 'vstart', self
        assert any(self.type == x for x in ['app', 'cavity', 'fspr', 'vstart', 'bars', 'hd', 'sbend', 'freeze', 'ST', 'sb', 'pre-agg']), self
        # TODO allow empty recipient
        assert any(self.recipient == x for x in ['neighbour', 'partner', 'focal', '']), self
        
    def __format__(self, format_spec):
        return f'"{self.timestamp}, {self.subject}, {self.type}, {self.recipient}"'           

    def __str__(self):
        return f'"{self.timestamp}, {self.subject}, {self.type}, {self.recipient}"'
    
    def __repr__(self):
        return f'"{self.timestamp}, {self.subject}, {self.type}, {self.recipient}"'           

The labels on the observations must be fixed up. "Neighbour" to "Experiment Control" and "Stranger" to "Experiment Treatment", and so on.  All labels are checked against the `pattern` regular expression that is specified here.

In [7]:
pattern = re.compile(
    '(F[0-9]) +([0-9]{2})\.([0-9]{2})\.? +(experiment|habituation) (treatment|control)')

def get_label(s):
    groups = re.fullmatch(pattern, s).groups()
    return f'{groups[0]} 2017.{groups[2]}.{groups[1]} {groups[3]} {groups[4]}'

def correct_label(s):
    new_k = str(s)
    new_k = new_k.replace('neighbour', 'experiment control')
    new_k = new_k.replace('stranger', 'experiment treatment')
    new_k = new_k.replace('habituation experiment', 'habituation treatment')
    new_k = new_k.replace('treatment control', 'experiment control')
    new_k = new_k.replace('habitiution control', 'habituation control')
    new_k = new_k.replace('habitiution experiment', 'habituation treatment')
    new_k = new_k.replace('habitation experiment', 'habituation treatment')
    new_k = new_k.replace('habitiution control', 'habituation control')

    # ensure that all keys now match the expected layout
    if not pattern.fullmatch(new_k):
        raise ValueError()

    new_k = get_label(new_k)    
    if new_k != s:
        if verbose:
            print(f'Change {k:>40} -> {new_k}')
    
    return new_k

Start by removing some events that are wrong/invalid.  If the vstart is missing at the start of the list of events then add it.  The events themselves are also touched up, the 'mwr' events are generally missing the recipient, and the timestamps are also normalised here such that vstart always happens at $t = 0$.  This makes the analysis further down easier.

In [8]:
observations = dict()

for k,v in original_observations.items():
    try:
        label = correct_label(k)

        events = list()
        for event in v['events']:
            if event[3] == '':
                event[3] = 'neighbour'
            event[1] = event[1].replace(' ', '_')
            if event[1].startswith('pair_'):
                print(f'Found pair event: {event}')
                (_, _, pairid) = event[1].partition('_')
                event[1] = 'male_' + pairid
                print(f'Adding event: {event}')
                events.append(Event(event))
                event[1] = 'female_' + pairid
                print(f'Adding event: {event}')
                events.append(Event(event))
            else:
                print(f'Adding event: {event}')
                events.append(Event(event))
        
        if not events:
            print(f'Observation {label} has no events!')
            continue

        if events[0].type != 'vstart':
            if verbose:
                print(f'Observation {label} is missing a vstart as the very first event!')
            # for the moment add a vstart with t = 0
            events.insert(0, Event([0.0, "", "vstart", "", ""]))
    
        vstart_timestamp = events[0].timestamp
        for event in events:
            # normalise timestamps so that the vstart timestamp is always 0
            event.timestamp = event.timestamp - vstart_timestamp

        observations[label] = events

    except ValueError as e:
        if k.startswith('x'):
            print(f'Observation {k!r} is marked as invalid (starts with "x")')
        else:
            print(f'Observation {k!r} is invalid: {e}')

Change            F5 09.06. habituation control -> F5 2017.06.09 habituation control
Adding event: [0.0, '', 'vstart', 'neighbour', '']
Adding event: [38.312, 'female_B', 'fspr', 'neighbour', '']
Adding event: [44.545, 'male_A', 'app', 'partner', '']
Adding event: [66.898, 'female_B', 'fspr', 'partner', '']
Adding event: [78.352, 'female_A', 'app', 'neighbour', '']
Adding event: [84.266, 'male_A', 'cavity', 'neighbour', '']
Adding event: [90.3, 'male_B', 'fspr', 'partner', '']
Adding event: [101.65, 'female_B', 'ST', 'neighbour', '']
Adding event: [111.17, 'female_A', 'cavity', 'neighbour', '']
Adding event: [117.405, 'female_A', 'cavity', 'neighbour', '']
Adding event: [122.214, 'female_B', 'ST', 'neighbour', '']
Adding event: [137.582, 'male_B', 'ST', 'neighbour', '']
Adding event: [144.153, 'female_B', 'ST', 'neighbour', '']
Adding event: [151.024, 'female_B', 'ST', 'neighbour', '']
Adding event: [154.606, 'female_B', 'ST', 'neighbour', '']
Adding event: [161.524, 'female_B', 'ST', 

Adding event: [150.599, 'male_A', 'app', 'neighbour', '']
Adding event: [151.174, 'male_B', 'app', 'neighbour', '']
Adding event: [153.029, 'male_A', 'app', 'neighbour', '']
Adding event: [153.884, 'female_A', 'app', 'neighbour', '']
Adding event: [153.884, 'male_B', 'fspr', 'neighbour', '']
Found pair event: [159.352, 'pair_B', 'app', 'neighbour', '']
Adding event: [159.352, 'male_B', 'app', 'neighbour', '']
Adding event: [159.352, 'female_B', 'app', 'neighbour', '']
Found pair event: [161.296, 'pair_A', 'app', 'neighbour', '']
Adding event: [161.296, 'male_A', 'app', 'neighbour', '']
Adding event: [161.296, 'female_A', 'app', 'neighbour', '']
Adding event: [164.119, 'female_A', 'app', 'neighbour', '']
Adding event: [164.119, 'female_B', 'app', 'neighbour', '']
Adding event: [165.147, 'female_A', 'app', 'neighbour', '']
Adding event: [165.147, 'female_B', 'app', 'neighbour', '']
Adding event: [166.067, 'female_B', 'app', 'neighbour', '']
Adding event: [166.764, 'male_B', 'app', 'neigh

Adding event: [493.997, 'female_B', 'app', 'neighbour', '']
Found pair event: [496.968, 'pair_B', 'sbend', 'neighbour', '']
Adding event: [496.968, 'male_B', 'sbend', 'neighbour', '']
Adding event: [496.968, 'female_B', 'sbend', 'neighbour', '']
Adding event: [497.835, 'female_A', 'sbend', 'neighbour', '']
Adding event: [506.787, 'female_B', 'app', 'neighbour', '']
Adding event: [516.897, 'male_A', 'app', 'neighbour', '']
Adding event: [517.933, 'male_A', 'app', 'neighbour', '']
Adding event: [525.629, 'female_A', 'app', 'neighbour', '']
Adding event: [527.915, 'female_B', 'sbend', 'neighbour', '']
Adding event: [549.114, 'female_A', 'app', 'neighbour', '']
Adding event: [550.194, 'female_B', 'sbend', 'neighbour', '']
Adding event: [559.305, 'male_A', 'app', 'neighbour', '']
Adding event: [563.908, 'female_A', 'app', 'neighbour', '']
Adding event: [564.797, 'female_B', 'sbend', 'neighbour', '']
Adding event: [569.13, 'male_A', 'app', 'neighbour', '']
Adding event: [578.594, 'female_A',

Adding event: [491.977, 'male_B', 'app', 'neighbour', '']
Adding event: [492.799, 'male_A', 'app', 'neighbour', '']
Adding event: [522.503, 'female_A', 'cavity', 'neighbour', '']
Adding event: [525.759, 'male_A', 'app', 'neighbour', '']
Adding event: [530.822, 'female_A', 'app', 'neighbour', '']
Adding event: [531.249, 'male_A', 'fspr', 'neighbour', '']
Adding event: [532.856, 'male_B', 'app', 'neighbour', '']
Adding event: [534.566, 'female_A', 'app', 'neighbour', '']
Adding event: [535.758, 'male_B', 'app', 'neighbour', '']
Found pair event: [536.429, 'pair_A', 'app', 'neighbour', '']
Adding event: [536.429, 'male_A', 'app', 'neighbour', '']
Adding event: [536.429, 'female_A', 'app', 'neighbour', '']
Adding event: [537.11, 'male_B', 'app', 'neighbour', '']
Found pair event: [537.868, 'pair_A', 'app', 'neighbour', '']
Adding event: [537.868, 'male_A', 'app', 'neighbour', '']
Adding event: [537.868, 'female_A', 'app', 'neighbour', '']
Adding event: [558.817, 'male_A', 'app', 'neighbour

Adding event: [534.734, 'female_B', 'app', 'neighbour', '']
Found pair event: [536.769, 'pair_B', 'app', 'neighbour', '']
Adding event: [536.769, 'male_B', 'app', 'neighbour', '']
Adding event: [536.769, 'female_B', 'app', 'neighbour', '']
Adding event: [538.554, 'female_B', 'app', 'neighbour', '']
Found pair event: [539.365, 'pair_B', 'app', 'neighbour', '']
Adding event: [539.365, 'male_B', 'app', 'neighbour', '']
Adding event: [539.365, 'female_B', 'app', 'neighbour', '']
Adding event: [540.951, 'female_B', 'app', 'neighbour', '']
Adding event: [541.69, 'female_B', 'app', 'neighbour', '']
Found pair event: [542.807, 'pair_B', 'app', 'neighbour', '']
Adding event: [542.807, 'male_B', 'app', 'neighbour', '']
Adding event: [542.807, 'female_B', 'app', 'neighbour', '']
Adding event: [543.9, 'female_B', 'app', 'neighbour', '']
Adding event: [545.236, 'female_B', 'app', 'neighbour', '']
Adding event: [551.254, 'male_A', 'ST', 'neighbour', '']
Adding event: [553.849, 'female_B', 'app', 'ne

Adding event: [298.397, 'female_A', 'app', 'neighbour', '']
Adding event: [298.992, 'male_B', 'app', 'neighbour', '']
Adding event: [301.154, 'female_B', 'fspr', 'neighbour', '']
Adding event: [302.388, 'female_A', 'app', 'partner', '']
Adding event: [304.655, 'female_A', 'app', 'neighbour', '']
Found pair event: [305.114, 'pair_B', 'app', 'neighbour', '']
Adding event: [305.114, 'male_B', 'app', 'neighbour', '']
Adding event: [305.114, 'female_B', 'app', 'neighbour', '']
Adding event: [306.421, 'male_B', 'app', 'neighbour', '']
Adding event: [307.567, 'female_A', 'app', 'neighbour', '']
Adding event: [308.298, 'female_B', 'fspr', 'partner', '']
Adding event: [310.836, 'female_B', 'app', 'neighbour', '']
Adding event: [312.666, 'female_A', 'fspr', 'neighbour', '']
Adding event: [312.666, 'male_B', 'app', 'neighbour', '']
Adding event: [316.116, 'male_B', 'app', 'neighbour', '']
Adding event: [317.829, 'female_A', 'app', 'neighbour', '']
Adding event: [319.983, 'male_B', 'fspr', 'neighb

In [9]:
print(f'Have {len(observations)} valid observations')

Have 79 valid observations


This is where the fun stuff really starts, so initially we have some basic scaffolding code, although with the definition of two different kinds of latency, `max_latency` and `base_latency`.  We also define a `CombiningData` type; this is the combination of the observation id plus the subject.  The output data has one row for each unique `CombiningData`.  The `get_first_occurance_timestamp` searches a list of events until both the subject and recipient are matched.

In [10]:
max_latency = 1000000
base_latency = 120

class CombiningData(namedtuple('CombiningData', ['obs_id', 'subject'])):
    def get_csv(self):
        return '{}, {}, {}, {}, {}'.format(*self.obs_id.split(' '), self.subject)

def get_first_occurance_timestamp(subject, recipient, events):
    for event in events:
        if event.type == 'vstart':
            continue
        if event.recipient == recipient and event.subject == subject:
           return event.timestamp
    return max_latency

The `GetLatency` class takes a list of events and determines the timestamp of the first event for each combination subject and recipient.  We are not interested in any events involving the partner, only intruder and neighbour.  If there is no event in the list for any subject and recipient pair, then insert a "maximum latency"; this is the lowest latency of all combinations for this list of events plus the "base latency", which was specified in the previous cell (`base_latency`).

In [11]:
class GetLatency:
    def __init__(self, events):
        # generate a list of the slots and get the first timestamp for each.
        tuples = [(x, y, f'{x}_{y}') for x in ['male_A', 'female_A', 'male_B', 'female_B'] for y in ['partner', 'neighbour']]
        for t in tuples:
            setattr(self, t[2], get_first_occurance_timestamp(t[0], t[1], events))

        # now for all events in this observation calculate the minimal latency.
        min_latency = min(map(lambda x: getattr(self, x[2]), tuples))

        # check that every observation has a minimum latency
        assert min_latency != max_latency, events
        
        # cap all latencies, if there is no event in this observation then
        # get_first_occurance_timestamp returns a very large latency which is
        # definitely going to be much much larger than base_latency plus min_latency.
        for t in tuples:
            setattr(self, t[2], min(base_latency + min_latency, getattr(self, t[2])))

Now that we have the scaffolding setup this is a straightforward traverse of the data generating a dictionary of `CombiningData` to latencies, called `latencies` (not very original).  For each `CombiningData` there are two latencies, the intruder latency and the neighbour latency.  So for each observation we should be entering four entries into `latencies`.

In [12]:
Latency = namedtuple('Latency', ['partner', 'neighbour'])

latencies = dict()

for label, events in observations.items():
    latency = GetLatency(events)

    if verbose:
        print(f'{label}, m, {latency.male_A_partner}, {latency.male_A_neighbour}')
        print(f'{label}, f, {latency.female_A_partner}, {latency.female_A_neighbour}')
        print(f'{label}, nm, {latency.male_B_partner}, {latency.male_B_neighbour}')
        print(f'{label}, nf, {latency.female_B_partner}, {latency.female_B_neighbour}')

    # store latencies for next step
    latencies[CombiningData(label, 'male_A')] = Latency(latency.male_A_partner, latency.male_A_neighbour)
    latencies[CombiningData(label, 'female_A')] = Latency(latency.female_A_partner, latency.female_A_neighbour)
    latencies[CombiningData(label, 'male_B')] = Latency(latency.male_B_partner, latency.male_B_neighbour)
    latencies[CombiningData(label, 'female_B')] = Latency(latency.female_B_partner, latency.female_B_neighbour)

F5 2017.06.09 habituation control, m, 44.545, 84.266
F5 2017.06.09 habituation control, f, 158.312, 78.352
F5 2017.06.09 habituation control, nm, 90.3, 137.582
F5 2017.06.09 habituation control, nf, 66.898, 38.312
F6 2017.07.15 habituation treatment, m, 124.414, 42.802
F6 2017.07.15 habituation treatment, f, 80.38, 22.11
F6 2017.07.15 habituation treatment, nm, 35.756, 124.414
F6 2017.07.15 habituation treatment, nf, 4.414, 38.464
F5 2017.04.14 habituation control, m, 121.933, 52.746
F5 2017.04.14 habituation control, f, 121.933, 52.746
F5 2017.04.14 habituation control, nm, 121.933, 51.655
F5 2017.04.14 habituation control, nf, 121.933, 1.9330000000000003
F5 2017.04.28 habituation control, m, 123.101, 123.101
F5 2017.04.28 habituation control, f, 123.101, 123.101
F5 2017.04.28 habituation control, nm, 123.101, 3.101
F5 2017.04.28 habituation control, nf, 123.101, 3.655
F6 2017.06.25 experiment control, m, 120.011, 0.01100000000000012
F6 2017.06.25 experiment control, f, 120.011, 0.011

This is the core of what we did in the first python script, so first up define a data structure to hold the data.  We already have the `CombiningData`, so that is the "key", and the output will have one row for each `CombiningData`.  The value type is the `BehaviourModifier`.

In [26]:
# this can't be a namedTuple, as we need to update the fields using set & get attr.
# Use slots for speed & to ensure that only the fields we want are valid.
class BehaviourModifier:
    def __init__(self):
        self.app_neighbour = 0
        self.app_partner = 0
        self.fspr_neighbour = 0
        self.fspr_partner = 0
        self.cavity_neighbour = 0
        self.ST_neighbour = 0
        self.hd_neighbour = 0
        self.hd_partner = 0
        self.bars_neighbour = 0
        self.bars_partner = 0
        self.freeze_neighbour = 0
        self.sbend_neighbour = 0
        self.sbend_partner = 0
        self.l_partner = None
        self.l_neighbour = None
    
    def get_csv(self):
        return ', '.join(['{}'.format(getattr(self, field)) for field in vars(self)])
    
    def get_csv_fields(self):
        return ', '.join(['{}'.format(field) for field in vars(self)])

In [None]:
def create_pairid_status_lookup_table():
    table = dict()
    return table


Iterate over the observations, then over the events, and total up how many events of each type are present.

In [22]:
# key is a CombiningData, value is BehaviourModifier
combo_dict = dict()

for label in observations.keys():
    for subject in ['male_A', 'female_A', 'male_B', 'female_B']:
        combo_dict[CombiningData(label, subject)] = BehaviourModifier()

# iterate over the observations
for label, events in observations.items():

    freeze_state = dict()
    
    # iterate over the events in the observation
    for event in events:
        if not event.subject or event.type == 'vstart':
            if event.type != 'vstart':
                print(f'Missing subject: "{combo.obs_id}" {event}, skipping...')
            continue

        combo = CombiningData(label, event.subject)
        data = combo_dict[combo]
        field = f'{event.type}_{event.recipient}'

        # freeze events are NOT "point" events; they are "state" events, toggling on and off.
        if event.type == 'freeze':
            try:
                # if this fails then it is entering the freeze state
                start_time = freeze_state[event.subject]
                # leaving freeze state, remove from dict
                del freeze_state[event.subject]
                setattr(data, field, getattr(data, field) + (event.timestamp - start_time))
            
            except KeyError:
                freeze_state[event.subject] = event.timestamp
            # Skip normal increment stuff here
            continue
        
        # now increment certain fields
        try:
            setattr(data, field, getattr(data, field) + 1)

        except AttributeError:
            # pre-agg should just be ignored in the analysis
            assert field == 'pre-agg_', field

            # ignore errors
            continue

        combo_dict[combo] = data

Missing subject: "F5 2017.04.21 experiment control" "192.15099999999998, , app, neighbour", skipping...
Missing subject: "F5 2017.04.21 experiment control" "270.591, , app, neighbour", skipping...
Missing subject: "F5 2017.07.09 experiment control" "137.646, , fspr, neighbour", skipping...
Missing subject: "F5 2017.05.01 experiment control" "439.122, , ST, neighbour", skipping...
Missing subject: "F6 2017.05.01 experiment control" "294.902, , app, partner", skipping...
Missing subject: "F5 2017.05.18 experiment treatment" "564.285, , fspr, neighbour", skipping...
Missing subject: "F5 2017.05.18 experiment treatment" "589.7479999999999, , fspr, neighbour", skipping...
Missing subject: "F2 2017.06.09 habituation treatment" "495.343, , app, partner", skipping...
Missing subject: "F6 2017.04.28 habituation control" "106.33099999999999, , app, neighbour", skipping...


Inserting the latencies can be done in one place.  As both the `combo_dict` and `latencies` use the same set of keys, the lookup should never fail.

In [23]:
for combo_label, item in combo_dict.items():
    item.l_partner = latencies[combo_label].partner
    item.l_neighbour = latencies[combo_label].neighbour

Sanity check: there should be $4 \times len(observations)$ in combo data, as each observation has four subjects.

In [24]:
print(f'Combo dict has {len(combo_dict)} items, should have {4 * len(observations)} items')
assert len(combo_dict) == 4 * len(observations)

Combo dict has 316 items, should have 316 items


Finally output the data to a csv file.

In [25]:
# now output the data to a csv file for further processing
with open('output_data_long_videos.csv','w') as f:
    f.write(f'tank_num, date, phase, cond, subject, {BehaviourModifier().get_csv_fields()}\n')
    for k, v in combo_dict.items():
        f.write(f'{k.get_csv()}, {v.get_csv()}\n')

    print(f'Wrote {f.tell()} bytes')

Wrote 35135 bytes
