In [65]:
import pandas as pd
from os import path
import plotly.express as px

In [66]:
durations = pd.read_csv('/galitylab/data/studyforrest-data-annotations/old/structure/segments.csv')
durations['durations'] = durations['end'] - durations['start']
durations['durations'][0]

start_time = 0
all_segments = []
for i, segment in enumerate(range(1,9)):
    df = pd.read_csv(f'/galitylab/data/studyforrest-data-annotations/segments/avmovie/emotions_av_1s_events_run-{segment}_events.tsv', sep='\t')
    df['onset'] = df['onset'] + start_time
    all_segments.append(df)
    start_time += durations['durations'][i]
df = pd.concat(all_segments)


In [67]:
df[['character', 'onset', 'duration']]

Unnamed: 0,character,onset,duration
0,FORREST,192.00,12.0
1,FORREST,204.00,2.0
2,FORREST,206.00,3.0
3,FORREST,209.00,16.0
4,FORREST,225.00,15.0
...,...,...,...
108,FORREST,7017.68,4.0
109,FORREST,7021.68,10.0
110,FORREST,7031.68,1.0
111,FORREST,7032.68,3.0


In [68]:
def merge_consecutive_labels(labels: pd.DataFrame, labels_col: str) -> pd.DataFrame:
    merged = {labels_col: [], 'onset': [], 'duration': []}
    for i in range(len(labels)):
        character = labels.iloc[i][labels_col]
        onset = labels.iloc[i].onset
        duration = labels.iloc[i].duration

        if (len(merged[labels_col]) > 0) \
            and (merged[labels_col][-1] == character) \
            and (merged['onset'][-1] + merged['duration'][-1] == labels.iloc[i]['onset']):

            # If the next seen character is the same as the previous one
            # and their both consecutive (onset[i-1] + duration[i-1] == onset[i])
            # we merge the 2 exposures into one
            merged['duration'][-1] += labels.iloc[i]['duration']
        else:
            # Otherwise we add a new exposure
            merged[labels_col].append(character)
            merged['onset'].append(onset)
            merged['duration'].append(duration)
    
    return pd.DataFrame(merged)

In [69]:
df = merge_consecutive_labels(df, 'character')

In [71]:
df['end'] = df['onset'] + df['duration']

fig = px.timeline(df, x_start="onset", x_end="end", y="character")
fig.update_yaxes(autorange="reversed")
fig.layout.xaxis.type = 'linear'
fig.data[0].x = df.duration.tolist()

fig.show()

In [74]:
durations = pd.read_csv('/galitylab/data/studyforrest-data-annotations/old/structure/segments.csv')
durations['durations'] = durations['end'] - durations['start']
durations['durations'][0]

start_time = 0
start_movie = []
for i, segment in enumerate(range(1,5)):
    df = pd.read_csv(f'/galitylab/data/studyforrest-data-annotations/segments/avmovie/emotions_av_1s_events_run-{segment}_events.tsv', sep='\t')
    df['onset'] = df['onset'] + start_time
    start_movie.append(df)
    start_time += durations['durations'][i]
start_movie = pd.concat(start_movie)

start_movie = merge_consecutive_labels(start_movie, 'character')

start_time = 0
end_movie = []
for i, segment in enumerate(range(5,9)):
    df = pd.read_csv(f'/galitylab/data/studyforrest-data-annotations/segments/avmovie/emotions_av_1s_events_run-{segment}_events.tsv', sep='\t')
    df['onset'] = df['onset'] + start_time
    end_movie.append(df)
    start_time += durations['durations'][i]
end_movie = pd.concat(end_movie)

end_movie = merge_consecutive_labels(end_movie, 'character')

px.histogram(start_movie, x='duration', color='character').show()
px.histogram(end_movie, x='duration', color='character').show()

In [100]:
import numpy as np
start_movie_sum = start_movie.groupby('character').sum('duration')
end_movie_sum = end_movie.groupby('character').sum('duration')
joined = start_movie_sum.join(end_movie_sum, how='outer', lsuffix='_start', rsuffix='_end').reset_index()
joined = joined.fillna(0)
joined['duration_start'] = joined['duration_start'] / np.sum(joined['duration_start'])
joined['duration_end'] = joined['duration_end'] / np.sum(joined['duration_end'])

In [103]:
fig = px.bar(joined, x="character", y=['duration_start', 'duration_end'], barmode='group', title='Percent of total exposure duration in half')
fig.update_layout(barmode='group', xaxis={'categoryorder':'total descending'})
fig.show()


In [93]:
px.pie(joined, values='duration_start', names='character', title='Beginning of movie character exposures').show()

px.pie(joined, values='duration_end', names='character', title='Beginning of movie character exposures').show()