# Story data analyses
---

## import libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import numpy as np
import glob as gl
import plotly.plotly as pl
from datetime import datetime as dt
from datetime import timedelta as td
from scipy import stats as st

## import data
import csv files that were exported from 'dataframe_processing_and_export.ipynb' and then moved to EXPORT folder.

In [2]:
#fetch story data from csv files stored in /documents and with filename format "initials_type.csv" (e.g. og_pm, og_su). Store the fetched data as pandas dataframes inside dictionary d
storydata1 = gl.glob('EXPORT/*_pm.csv')
storydata2 = gl.glob('EXPORT/*_su.csv')

d1 = {}#pm DBs

d_raw = {}#su DBs

#load csvs as dataframes into separate dictionaries
for story in storydata1:
    d1[story[7:12]] = pd.read_csv(story, sep=',', encoding='latin-1')

for story in storydata2:
    d_raw[story[7:12]] = pd.read_csv(story, sep=',', encoding='latin-1')    

## functions

### pre-processing

- **format_timeVals**: prepares all imported time data for any further processing.

- **distribute_ids_to_adjacent_places**: takes raw story units as inputs and allocates new IDs to these units based on whether they are adjacent to story units referring to the same place.

- **consolidate_adjacent_places**: takes the data processed by distribute_ids_to_adjacent_places and aggregates rows based on the newly-allocated IDs into the dictionary of dataframes entitled *d_con*.

- **add_su_buffer**: adds temporal buffers of user's choice (in seconds) to the input dataframe.

### statistics

- **new_timeVals**: calculate new fields time_length, which provides each story unit length as a timedelta value, and num_minutes, which provides this value as a real number.

- **calcStats**: calculates some statistics based on story unit / place mention dynamics. That is, whether a place mention with a *time* val falls between *time_start* and *time_end* vals (>=,<). When a mention occurs at the temporal breakpoint between two story units, the story unit that follows the first is the one that the mention becomes associated with [...]

### aggregation

- **aggregatedByPlaces**: input data is copied into a new dictionary entitled *d_byPlace* and aggregated by place name, producing data whereby each row represents a totality of attributes pertaining to the story units that related to the said place.

- **timeSum**: is called from within aggregatedByPlaces to enable the sum of the time_length field during aggregation.

---
the following cell needs to be run before proceeding

In [29]:
def format_timeVals(name, df):
    if name[-2:] == 'pm':
        df['time'] = pd.to_datetime(df.time, format='%Y-%m-%d %H:%M:%S')
    else:
        df['time_start'] = pd.to_datetime(df.time_start, format='%Y-%m-%d %H:%M:%S')
        df['time_end'] = pd.to_datetime(df.time_end, format='%Y-%m-%d %H:%M:%S')

#attribute new IDs to story units so that each unique ID represents a change in spatial discourse instead of a story unit change (i.e. each ID represents a spatially distinct chunk of discourse)
def distribute_ids_to_adjacent_places(name, df):
    for i in df.index:
        if df.loc[i, 'id'] == 1:
            df.loc[i, 'agg_su_id'] = df.loc[i, 'id']
        elif df.loc[i, 'place'] == df.loc[i-1, 'place']:
            df.loc[i, 'agg_su_id'] = df.loc[i-1, 'agg_su_id']
        else:
            df.loc[i, 'agg_su_id'] = df.loc[i, 'id']

#aggregate these rows by these new IDs
def consolidate_adjacent_places(name, df, append):
    d_raw[name + append] = df.groupby(['agg_su_id','place','scale_order'], as_index=False).agg({
        'id': lambda x: x.astype('str').str.cat(sep=';'),
        'su_num': lambda x: x.astype('str').str.cat(sep=';'),
        'time_start':'first',
        'time_end':'last'})
    d_raw[name + append] = d_raw[name + append][['agg_su_id','id','su_num','place','scale_order','time_start','time_end']]
        
def add_su_buffer(name, df, buffer):
    #subtract timedelta object from each datetime object in column and assign new values
    df['time_start'] = df['time_start'].map(lambda x: x - td(seconds=buffer))
    df['time_end'] = df['time_end'].map(lambda x: x + td(seconds=buffer))
    #add 5 seconds to first value to make sure it remains at 00:00:00
    df.iloc[0]['time_start'] = df.iloc[0]['time_start'] + td(seconds=buffer)

def new_timeVals(name, df):
    df['time_length'] = df.time_end - df.time_start
    df['num_minutes'] = df['time_length'].map(lambda x: x.total_seconds()/60)
    #exported pd.datetime values are imported and need to be trimmed into datetime objects w format HH:MM:SS
    #df['time_length'] = pd.to_datetime(df.time_length.str.replace('.000000000','').str.replace('[0-9]+ days?,? ',''), format='%H:%M:%S')
    #this datetime object is then converted into a list of 3 time values representing [hours,minutes,seconds]
    df['time_length'] = df['time_length'].map(lambda x: str(x).split(' ')[2].split(':'))        
    
def calcStats(name1, df1, name2, df2):
    
    r_pm = range(len(df1.index))
    r_su = range(len(df2.index))
    
    df2['mention_freq'] = 0
    df2['mention_index'] = ''
    df2['mention_places'] = ''
    
    df2['mention_match_freq'] = 0
    df2['mention_match_index'] = ''
    df2['mention_match_places'] = ''
    df2['mention_match_time_index'] = ''#index of mention match times
    df2['mention_first_match_time'] = 0#how much time has elapsed between the beginning of the story unit and the first place name mention matching the story unit's location
    df2['mention_first_match_time_ratio'] = 0#a ratio that shows how much time has elapsed between the beginning of the story unit and the first place name mention matching the story unit's location as a fraction of the story unit's total time.
    
    
    df2['mention_coarser_match_freq'] = 0
    df2['mention_coarser_match_index'] = ''
    df2['mention_coarser_match_places'] = ''
    
    df2['mention_finer_match_freq'] = 0
    df2['mention_finer_match_index'] = ''
    df2['mention_finer_match_places'] = ''
    
    try:
         for i in r_su:
            a = dates.date2num(df2.iloc[i]['time_start'])
            b = dates.date2num(df2.iloc[i]['time_end'])
            
            place1 = df2.iloc[i]['place']
            x = 0
            x1 = ''
            x11 = ''
            
            y = 0
            y1 = ''
            y11 = ''
            y111 = ''
            y111_ = []
            
            y2 = 0
            y21 = ''
            y22 = ''
            y3 = 0
            y31 = ''
            y32 = ''

            for j in r_pm:
                c = dates.date2num(df1.iloc[j]['time'])
                if (c >= a) and (c < b):
                    _id = str(df1.iloc[j]['id'])
                    place2 = df1.iloc[j]['place']

                    x += 1
                    x1 += (";" + _id)
                    x11 += (";" + place2)
                    if place2 == place1:
                        y += 1
                        y1 += (";" + _id)
                        y11 += (";" + place2)
                        
                        #am not using existing vars here to create new var since i need the result to be in timedelta format
                        time_since_su_start = df1.iloc[j]['time'] - df2.iloc[i]['time_start']
                        
                        y111 += (";" + str(time_since_su_start))
                        y111_.append(time_since_su_start)
                    elif place1 in place2:
                        y2 += 1
                        y21 += (";" + _id)
                        y22 += (";" + place2)
                    elif place2 in place1:
                        y3 += 1
                        y31 += (";" + _id)
                        y32 += (";" + place2)
                    else:
                        pass
                else:
                    pass
            else:
                df2.iloc[i, df2.columns.get_loc('mention_freq')] = x
                df2.iloc[i, df2.columns.get_loc('mention_index')] = x1
                df2.iloc[i, df2.columns.get_loc('mention_places')] = x11

                df2.iloc[i, df2.columns.get_loc('mention_match_freq')] = y
                df2.iloc[i, df2.columns.get_loc('mention_match_index')] = y1
                df2.iloc[i, df2.columns.get_loc('mention_match_places')] = y11
                
                if (len(y111_) > 0):
                    df2.iloc[i, df2.columns.get_loc('mention_match_time_index')] = y111
                    df2.iloc[i, df2.columns.get_loc('mention_first_match_time')] = y111_[0]
                    df2.iloc[i, df2.columns.get_loc('mention_first_match_time_ratio')] = y111_[0].total_seconds() / (df2.iloc[i]['time_end'] - df2.iloc[i]['time_start']).total_seconds()
                
                df2.iloc[i, df2.columns.get_loc('mention_finer_match_freq')] = y2
                df2.iloc[i, df2.columns.get_loc('mention_finer_match_index')] = y21
                df2.iloc[i, df2.columns.get_loc('mention_finer_match_places')] = y22
                
                df2.iloc[i, df2.columns.get_loc('mention_coarser_match_freq')] = y3
                df2.iloc[i, df2.columns.get_loc('mention_coarser_match_index')] = y31
                df2.iloc[i, df2.columns.get_loc('mention_coarser_match_places')] = y32


    except:
        print(name1, name2, place1, place2, i, j, _id)
        raise

#look at unit-mention dynamics by place (total of all spatial units in a same place within a story), instead of spatial units.
def aggregatedByPlaces(name, df, append):
    d_raw[name+append] = df.groupby(['place','scale_order'], as_index=False).agg({
        'agg_su_id': lambda x: x.astype('str').str.replace('.0','').str.cat(sep=';'), #duplicates need to be removed
        'id': lambda x: x.astype('str').str.cat(sep=';'),
        'su_num': lambda x: x.astype('str').str.cat(sep=';'),
        'time_start':'first',
        'time_end':'last',
        'time_length': lambda x: timeSum(x),
        'num_minutes': 'sum',
        'mention_index': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_match_index': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_coarser_match_index': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_finer_match_index': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_places': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_coarser_match_places': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_match_places': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_finer_match_places': lambda x: x.astype('str').str.cat(sep='').strip(';').replace('nan',''),
        'mention_freq':'sum',
        'mention_coarser_match_freq':'sum',
        'mention_match_freq':'sum',
        'mention_finer_match_freq':'sum'})
    
def timeSum(x):
    #input for this function is a list of strings representing time in the format HH:MM:SS
    tdeltas = []
    for i in x:
        #x is a list whose values reflect the grouping logic of the groupby function.
        #timedeltas are represented in absolute seconds
        tdeltas.append(td(hours=int(i[0]),minutes=int(i[1]),seconds=int(i[2])))
    #adding a timedelta object (td) to the sum will provoke the sums output to be in td format
    return sum(tdeltas, td())

the calcStats function will add the following to all databases:

table legend
----------------

- 'su_num': story unit number (links to original database)

- 'time_start': beginning time of unit
- 'time_end': end-time of unit
- 'time_length': timespan of unit
- 'num_minutes': number of minutes in unit represented as a real number

- 'mention_index': list of IDs of places mentioned during timespan of unit
- 'mention_match_index': list of IDs of places mentioned during timespan of unit that match the location of the unit
- 'mention_coarser_match_index': list of IDs of places mentioned during timespan of unit that match the location of the unit at a coarser scale
- 'mention_finer_match_index': list of IDs of places mentioned during timespan of unit that match the location of the unit at a finer scale

- 'mention_places': list of places mentioned during timespan of unit
- 'mention_match_places': list of places mentioned during timespan of unit that match the location of the unit
- 'mention_coarser_match_places': list of places mentioned during timespan of unit that match the location of the unit at a coarser scale
- 'mention_finer_match_places': list of places mentioned during timespan of unit that match the location of the unit at a finer scale 

- 'mention_freq': number of places mentioned during timespan of unit
- 'mention_coarser_match_freq': number of places mentioned during timespan of unit that match the location of the unit at a coarser scale
- 'mention_match_freq': number of places mentioned during timespan of unit that match the location of the unit
- 'mention_finer_match_freq': number of places mentioned during timespan of unit that match the location of the unit at a finer scale

- 'mention_match_time_index': list of times of places mentioned during timespan of unit that match the location of the unit
- 'mention_first_match_time': time of the first place mentioned during timespan of unit that matches the location of the unit
- 'mention_first_match_time_ratio': time of the first place mentioned during timespan of unit that matches the location of the unit as a fraction of the total timespan of that unit

## Executing the functions
---
running these functions uses loops to cycle through each story within a dictionary. Each dictionary contains a copy of all 10 stories to which these functions need to be applied in a different order depending on what we want to test.

For the present analysis, we will apply 3 database formats to all 10 stories, each will require its own pipeline of functions, the formats include:
1. a format with each aggregated story unit and attributes about that unit's relation with simultaneous mentions of the place.
2. a format with each aggregated story unit (with its temporal boundaries extended by 5 seconds) and attributes about that unit's relation with simultaneous mentions of the place.*
3. a format with each aggregated story unit (with its temporal boundaries extended by 10 seconds) and attributes about that unit's relation with simultaneous mentions of the place.*

*adding buffers at the beginning and end, but especially beginning, of spatal discourse units will make up for a story unit that may have started after a mention which provoked it.

each of the previous databases can be aggregated by place so as to produce more holistic and summative views on the relation between each individual place in a story and simultaneous mentions of that place.

4. a format with each unique place in a story and attributes about that place's relation with simultaneous mentions of the place.

let's format the place mentions first, once and for all:

In [4]:
for k, v in d1.items():
    format_timeVals(k, v)

create 3 new dictionaries from d_raw to contain these 3 formats:

In [5]:
for k, v in zip(list(d_raw.keys()),list(d_raw.values())):
    format_timeVals(k, v)
    distribute_ids_to_adjacent_places(k, v)
    consolidate_adjacent_places(k, v, '_2')
    consolidate_adjacent_places(k, v, '_2_b5')
    consolidate_adjacent_places(k, v, '_2_b10')
#if we are to reuse the same consolidate_adjacent_places function, we need to pass an argument that will generate a new df within the same dictionary (we cannot dynamically create new dictionaries using an argument passed to a new function, but we CAN create new dfs within an existing dict this way!)

#from this, create new dictionaries... for simplicity's sake

d_su = {}#raw story units
d_2 = {}#consolidated su DBs
d_2_b5 = {}#consolidated and expanded su DBs (5 seconds before beginning of each story unit and 5 seconds after)
d_2_b10 = {}#consolidated and expanded su DBs (10 seconds before beginning of each story unit and 10 seconds after)

#distribute these new dataframes to the new dicts
for k, v in d_raw.items():
    if k[-1:] == '2':
        d_2[k] = pd.DataFrame(v)
    elif k[-1:] == '5':
        d_2_b5[k] = pd.DataFrame(v)
    elif k[-1:] == '0':
        d_2_b10[k] = pd.DataFrame(v)
    elif k[-1:] == 'u':
        d_su[k] = pd.DataFrame(v)

1*. a format with each story unit and attributes about that unit's relation with simultaneous place mentions.

In [30]:
for (k1, v1), (k2, v2) in zip(sorted(d1.items()), sorted(d_su.items())):
    new_timeVals(k2, v2)
    calcStats(k1, v1, k2, v2)

1. a format with each aggregated story unit and attributes about that unit's relation with simultaneous place mentions.

In [None]:
for (k1, v1), (k2, v2) in zip(sorted(d1.items()), sorted(d_2.items())):
    new_timeVals(k2, v2)
    calcStats(k1, v1, k2, v2)

2. a format with each aggregated story unit (with its temporal boundaries extended by 5 seconds) and attributes about that unit's relation with simultaneous mentions of the place.

In [None]:
for (k1, v1), (k2, v2) in zip(sorted(d1.items()), sorted(d_2_b5.items())):
    add_su_buffer(k2, v2, 5)
    new_timeVals(k2, v2)
    calcStats(k1, v1, k2, v2)

3. a format with each aggregated story unit (with its temporal boundaries extended by 10 seconds) and attributes about that unit's relation with simultaneous mentions of the place.

In [None]:
for (k1, v1), (k2, v2) in zip(sorted(d1.items()), sorted(d_2_b10.items())):
    add_su_buffer(k2, v2, 10)
    new_timeVals(k2, v2)
    calcStats(k1, v1, k2, v2)

Now, we want to also produce new dataframes that will contain

1. each story broken down by unique place, rather than changes in spatial discourse. Therefore, we will be able to view a summary of each distinct place's relation to story units. We also want to see 
2. an aggregation of all these places across 10 stories, so we can have a more totalizing view on how scale plays into this relationship. 

We want these datasets for all three dictionaries (i.e. (1) the one with 5-second extended spatial discourse units, (2) the one with 10-second extended spatial discourse units, (3) the one with unchanged spatial discourse units).

In [None]:
for k, v in d_2.items():
    aggregatedByPlaces(k, v, '_p')
for k, v in d_2_b5.items():
    aggregatedByPlaces(k, v, '_p')
for k, v in d_2_b10.items():
    aggregatedByPlaces(k, v, '_p')

d_2_p = {}#consolidated su -> place-aggregated DBs
d_2_b5_p = {}#consolidated and expanded su DBs (5 seconds before beginning of each story unit and 5 seconds after) -> place-aggregated DBs
d_2_b10_p = {}#consolidated and expanded su DBs (10 seconds before beginning of each story unit and 10 seconds after) ->place-aggregated DBs

#the previous function added these new dataframes to our d_raw dictionary. Let's copy them to their own dictionaries for simplicity
for k, v in d_raw.items():
    if k[-3:] == '2_p':
        d_2_p[k] = pd.DataFrame(v)
    elif k[-3:] == '5_p':
        d_2_b5_p[k] = pd.DataFrame(v)
    elif k[-3:] == '0_p':
        d_2_b10_p[k] = pd.DataFrame(v)

now aggregate dataframes by new story unit ids (generated by 'consolidate_adjacent_places') using 'aggregatedByAggId' and then aggregated by place name using 'aggregatedByPlaces'.

In [None]:
total_2 = pd.concat(d_2)
total_b5 = pd.concat(d_2_b5)
total_b10 = pd.concat(d_2_b10)

p_all = aggregatedByPlaces('2', total_2, '_p_all')
b5_p_all = aggregatedByPlaces('2_b5', total_b5, '_p_all')
b10_p_all = aggregatedByPlaces('2_b10', total_b10, '_p_all')

## regression analyses

generate tables showing regression analyses btwn amount of minutes of discourse time regarding a given place (x) and number of simultaneous mentions of that place (y).

*the output here needs to be played with manually. Change the values inside the regression_table function to control which data are being tested.

In [None]:
def regression_table(name, df):
    keys = ['slope','intercept','r_value','p_value','std_err','n']
    newtable[name] = dict.fromkeys(keys)    
    #slope, intercept, r_value, p_value, std_err = st.linregress(df.loc[df['scale_order'] == 3, 'mention_match_freq'],df.loc[df['scale_order'] == 3, 'num_minutes'])
    #newtable[name] = {'slope':slope,'intercept':intercept,'r_value':r_value,'p_value':p_value,'std_err':std_err,'n':df.loc[df['scale_order'] == 3].shape[0]}
    slope, intercept, r_value, p_value, std_err = st.linregress(df.loc[(df['scale_order'] == 4), 'mention_finer_match_freq'], df.loc[(df['scale_order'] == 4), 'num_minutes'])
    newtable[name] = {'slope':slope,'intercept':intercept,'r_value':r_value,'p_value':p_value,'std_err':std_err,'n':df.loc[(df['scale_order'] == 3) | (df['scale_order'] == 2)].shape[0]}
    
def append_mean(df):
    mean_row = df.mean()
    mean_df = pd.DataFrame(mean_row)
    mean_df.columns = ['mean']
    mean_df = mean_df.transpose()
    df = df.append(mean_df)
    return df

newtable = {}
for k, v in d_2_b10_p.items():
    regression_table(k, v)
stats_df = pd.DataFrame(newtable).transpose()
stats_df = append_mean(stats_df)
stats_df

reg analyses for dfs containing all places aggregated.

In [None]:
regression_table('2_all_p', d_raw['2_p_all'])
stats_df = pd.DataFrame(newtable).transpose()
append_mean(stats_df.iloc[0:3])


## EH case study

Using a transcript onto which each story unit number was added inline, insert the text associated with each story unit into the eh story unit database for qualitative assessment.

In [152]:
file = gl.glob('EH_case_study/*.txt')
with open(file[0]) as txt:
    txtdata = txt.read()
    txtdata_list = txtdata.replace("{Unit ","|").replace("}","|").replace("\n","").replace("\\xa0?","").replace("\xa0","").replace("\\","").replace("\u0153","").replace("\u2019","").replace("\u201d","").split("|")
    #txtdata_list = txtdata.split("{")
    del txtdata_list[0]
d_txt = dict(zip(txtdata_list[::2], txtdata_list[1::2]))


eh_su = d_su['eh_su']
eh_su['transcript'] = ""


for k, v in d_txt.items():
    k1 = int(k)
    eh_su.loc[eh_su['su_num'] == k1, 'transcript'] = v
    
#display(eh_su)
eh_su.to_csv('eh_su_case_study.csv', sep=',', index=False, encoding='latin-1')
"""

for unit in transcript:
    su_number = unit(5:-2)
    eh_su.loc['su_number' == su_number, 'transcript'] = unit

eh_su.to_csv('eh_su_case_study.csv', sep=',', index=False, encoding='latin-1')
"""

SyntaxError: invalid syntax (<ipython-input-152-ab7ff8e90513>, line 4)

In [148]:
d_txt

{'10': " E.B.:: J'ai de très bons souvenirs de mes parents. D'abord ma mère c'était une femme qui était brillante, qui était intelligente, qui regrettait de ne pas avoir fait des études alors qu'elle aurait pu véritablement réussir. Je me souviens, alors que j'ai commencé à aller vivre chez mes parents vers l'âge de 14-15 ans, ma mère, tous les matins se levait, elle prenait sa bible et puis elle me lisait un passage de la bible. Souvent les mêmes passages. Plus tard d'ailleurs en vieillissant je me suis dit est ce que ma mère avait appris des passages par cœur (sourit). C'était avant qu'elle perde la vue; parce que plus tard elle a perdu la vue Elle était très contente évidemment de me montrer qu'elle était lettrée. Elle était très fière surtout qu'elle était dans une région où tout le monde était analphabète, mon père était analphabète, mon père n'a jamais été à l'école, il ne savait lire et quand ma mère lisait la bible, mon père était assis à côté tranquillement, il écoutait. C'éta

## scratch

In [None]:
pd.options.display.max_rows = 500
#display(d22['ep_su'])
#d2['ap_su'].loc[d2['ap_su']['scale_order'] == 6].shape[0]
#print(d22['bn_su'].shape[0], total['bn']

#aggregatedByPlaces('total',total_agg)
def searchit(d_raw, search_id):
    for k, v in d_raw.items():
        print(k)
        print(v.loc[v['id'] == search_id].place)

searchit(d_raw, 13)


In [33]:
#d_2_p['eh_su_2_p'].to_csv('eh_su_2_p.csv', sep=',', index=False, encoding='latin-1')
#pd.options.display.max_rows = 500
#display(d_raw['2_p_all'])
#d_su['ap_su']
#d_su['ap_su'].loc[50]['time_end'] - d1['ap_pm'].loc[4]['time']
for k, v in d_su.items():
    v.to_csv(k + '.csv', sep=',', index=False, encoding='latin-1')

In [124]:
pd.options.display.max_rows = 500
pd.options.display.max_columns = 500
display(d_su['eh_su'])

Unnamed: 0,id,session_num,su_num,time_clip_start,time_clip_end,time_start,time_end,place,place_raw,scale,journey,scale_order,agg_su_id,time_length,num_minutes,mention_freq,mention_index,mention_places,mention_match_freq,mention_match_index,mention_match_places,mention_match_time_index,mention_first_match_time,mention_first_match_time_ratio,mention_coarser_match_freq,mention_coarser_match_index,mention_coarser_match_places,mention_finer_match_freq,mention_finer_match_index,mention_finer_match_places,transcript
0,1,1,1,0.0,0.0,1900-01-01 00:00:00,1900-01-01 00:00:15,_end,none,,,8,1.0,"[00, 00, 15]",0.25,0,,,0,,,,0,0.0,0,,,0,,,
1,2,1,2,,,1900-01-01 00:00:15,1900-01-01 00:00:40,"Trois-Rivières, Quebec",Trois-Rivières; Quebec,city / area,,4,2.0,"[00, 00, 25]",0.416667,3,;1;2;3,";Trois-Rivières, Canada;Trois-Rivières, Canada...",0,,,,0,0.0,0,,,0,,,
2,3,1,3,,,1900-01-01 00:00:40,1900-01-01 00:00:45,_end,none,,,8,3.0,"[00, 00, 05]",0.083333,0,,,0,,,,0,0.0,0,,,0,,,
3,4,1,4,,,1900-01-01 00:00:45,1900-01-01 00:01:20,Rwanda,Rwanda,country,,6,4.0,"[00, 00, 35]",0.583333,3,;4;5;6,;Rwanda;Belgium;Rwanda,2,;4;6,;Rwanda;Rwanda,;0 days 00:00:05;0 days 00:00:25,0 days 00:00:05,0.142857,0,,,0,,,
4,5,1,5,,,1900-01-01 00:01:20,1900-01-01 00:01:30,Rwanda,Rwanda,country,,6,4.0,"[00, 00, 10]",0.166667,0,,,0,,,,0,0.0,0,,,0,,,
5,6,1,6,,,1900-01-01 00:01:30,1900-01-01 00:02:10,Rwanda,Rwanda,country,,6,4.0,"[00, 00, 40]",0.666667,2,;7;8,;Rwanda;Rwanda,2,;7;8,;Rwanda;Rwanda,;0 days 00:00:00;0 days 00:00:10,0 days 00:00:00,0.0,0,,,0,,,
6,7,1,7,,,1900-01-01 00:02:10,1900-01-01 00:02:30,Rwanda,Rwanda,country,,6,4.0,"[00, 00, 20]",0.333333,1,;9,;Rwanda,1,;9,;Rwanda,;0 days 00:00:00,0 days 00:00:00,0.0,0,,,0,,,
7,8,1,8,,,1900-01-01 00:02:30,1900-01-01 00:06:00,"Karama, Rwanda",Karama; Rwanda,local,,3,8.0,"[00, 03, 30]",3.5,4,;10;11;12;13,";Rwanda;Gikongoro, Rwanda;Karama, Rwanda;Rukon...",1,;12,";Karama, Rwanda",;0 days 00:02:58,0 days 00:02:58,0.847619,1,;10,;Rwanda,0,,,
8,9,1,9,,,1900-01-01 00:06:00,1900-01-01 00:06:05,_end,none,,,8,9.0,"[00, 00, 05]",0.083333,0,,,0,,,,0,0.0,0,,,0,,,
9,10,1,10,,,1900-01-01 00:06:05,1900-01-01 00:07:45,"Rukondo, Rwanda",Rukondo; Rwanda,local,,3,10.0,"[00, 01, 40]",1.666667,0,,,0,,,,0,0.0,0,,,0,,,
