### Load module

In [2]:
import os
import sys
# sys.path.insert(0, os.path.abspath('../novelgraphs/'))

# import novelgraphs as ng

import pandas as pd
from collections import defaultdict, Counter
from itertools import combinations
import networkx as nx
from operator import itemgetter
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
text = pd.read_pickle('fight_text.pickle')

In [3]:
import json
with open('say.json') as file:
    say = json.load(file)

In [4]:
%%time
# corenlp = ng.annotators.CoreNLP()
# nernpid = ng.annotators.NerNpID()
quote = ng.annotators.Quote()
first_person = ng.annotators.FirstPerson()
character = ng.annotators.Character()
dialog = ng.annotators.Dialog()
pipeline = ng.annotators.Pipeline([quote, first_person, character, dialog])
pipeline.annotate(text)

CPU times: user 2min 3s, sys: 536 ms, total: 2min 4s
Wall time: 2min 5s


# Interactions

### Dinamic

In [1]:
def simple_interactions(table):
    ''' 
    Collect characters with context where they act (NOT PAIR LIST!)
    >>> simple_interactions(table)
    >>> {0: [[17160, 17169, 1184], [17160, 17169, 1184]],
         1: [[1169, 1224, 71],
          [1169, 1224, 71],
          [1242, 1245, 75],
          [1246, 1249, 76],
          [1250, 1266, 77], ...}
    '''
    character_list = dict()
    for i in table.index:
        if table.CharacterID.loc[i] is not None:
            sent_numb = table.loc[i, 'SentenceID']
            character_list.setdefault(table.CharacterID.loc[i], []).append(
                                                [table[table.SentenceID == sent_numb].index[0], 
                                                  table[table.SentenceID == sent_numb].index[-1], 
                                                     table.SentenceID.loc[i]])
    return character_list

In [None]:
def simple_interactions_sentence(table):
    ''' Key is the SentenceID, value - list with all CharacterID's in sentence 
    >>> simple_interactions_sentence(text.tags)
    >>> {0: [13, 13],
         1: [13, 68],
         2: [68, 13, 13],
         3: [13],
         4: [68], ...}
    '''
    character_list = dict()
    for i in table.index:
        if table.CharacterID.loc[i] is not None:
            sent_numb = table.loc[i, 'SentenceID']
            character_list.setdefault(table.SentenceID.loc[i], []).append(table.CharacterID.loc[i])
    return character_list

In [44]:
def dinamic_interaction_in_sequence(table):
    '''
    Collect pairs in the sentence where verb is between elements of pair
    >>> dinamic_interaction_in_sequence(text.tags)
    >>> {(1, 13): [[15560, 15594]],
         (13, 1): [[15002, 15030], [15526, 15559], [23530, 23545], [56694, 56697]],
         (13, 17): [[37049, 37066]],
         (13, 68): [[6647, 6658]],
         (68, 1): [[1733, 1740], ...}
    '''
    list_of_pairs = dict()
    for i in table.index:
        if table.CharacterID.loc[i] is not None:
            if ((table.CharacterID.loc[i+2] is not None) & 
                ((table.Pos.loc[i+1] in ['VBD', 'VBN', 'VBP']) or (table.Lemma.loc[i+1] in say))):
                sent_numb = table.loc[i, 'SentenceID']
                list_of_pairs.setdefault((table.CharacterID.loc[i], table.CharacterID.loc[i+2]), []).append(
                                            [table[table.SentenceID == sent_numb].index[0], 
                                              table[table.SentenceID == sent_numb].index[-1]])
    return list_of_pairs

In [73]:
def dinamic_interaction_dependences(table):
    '''
    Collect pairs where one element is on the right from verb, another - on the left
    >>> dinamic_interaction_dependences(text.tags)
    >>> {(1, 13): [[23655, 23660], [46428, 46434], [49695, 49716]],
         (13, 1): [[56694, 56697]],
         (55, 3): [[6367, 6398]],
         (58, 60): [[21642, 21653]],
         (68, 1): [[1169, 1224], ...}
    '''
    list_of_pairs_dependences = dict()
    for s in table.SentenceID.unique():
        sentence = table.loc[table.SentenceID == s]
        for i in sentence.index:
            if (sentence.Pos.loc[i] in ['VBD', 'VBN', 'VBP'] or sentence.Lemma.loc[i] in say):
                left = sentence.loc[:i-1]
                left_el = left[~sentence.CharacterID.isnull()].index
                right = sentence.loc[i:]
                right_el = right[~sentence.CharacterID.isnull()].index
                for e in left_el:
                    if sentence.DepParse.loc[e] == sentence.TokenID.loc[i]:
                        for r in right_el:
                            if sentence.DepParse.loc[r] == sentence.TokenID.loc[i]:
                                list_of_pairs_dependences.setdefault((sentence.CharacterID.loc[e], 
                                                                    sentence.CharacterID.loc[r]), []).append(
                                                                    [sentence.index[0], sentence.index[-1]])
    return list_of_pairs_dependences

### Dialog\context

In [60]:
def get_interactions_from_conversation2(table):
    '''
    >>> dl, ds, dc = get_interactions_from_conversation2(text.tags)
    >>> dl
        Goes from notDialog to Dialog and collect characters in these pieces of text
    >>> {(13, 13, 13, 68, 68, 13, 13): [[0, 62]],
         (13, 13, 68, 68): [[40372, 40415]],
         (13, 13, 68, 68, 13, 68, 68, 17, 68, 13, 13): [[36781, 36978]], ...}
    
    >>> ds
        Goes from notDialog to Dialog and collect unique characters in these pieces of text
    >>> {(1,): [[8511, 8626], [29732, 29778], [29882, 29995]],
         (1, 10, 68, 13): [[4165, 5004]],
         (1, 13): [[15262, 15409], [16568, 16605], [25029, 25052]],
         (1, 17, 68): [[1645, 1983], [3891, 4113]], ...}
    
    >>> dc
        Goes from notDialog to Dialog and collect characters in these pieces of text, makes pairs (by combinations)
    >>> {(1, 4): [[52620, 59233]],
         (1, 5): [[16237, 16382]],
         (1, 8): [[52620, 59233]],
         (1, 10): [[4165, 5004]], ...}
    
    '''
    persons_in_dialog_list = dict()
    persons_in_dialog_set = dict()
    persons_in_dialog_combinations = dict()
    ids = [n for el in [[table[table.DialogID == i].index[0], 
            table[table.DialogID == i].index[-1]] for i in range(table.DialogID.max())] for n in el]
    dialog_ids = [[0,ids[0]]] + [ids[1:-1][i:i+2] for i in range(0, len(ids[1:-1]), 2)] + [[ids[-1], table.index.max()]]
    for el in dialog_ids:
        if len(table.CharacterID.loc[el[0]:el[1]][(~(table.CharacterID.isnull()))]) > 1:
            persons_in_dialog_list.setdefault((tuple(table.CharacterID.loc[el[0]:el[1]][(~(table.CharacterID.isnull()))])), 
                                              []).append([el[0], el[1]])
            persons_in_dialog_set.setdefault((tuple(set(table.CharacterID.loc[el[0]:el[1]][(~(table.CharacterID.isnull()))]))), 
                                             []).append([el[0], el[1]])
            speakers = set(table.CharacterID.loc[el[0]:el[1]][(~(table.CharacterID.isnull()))])
            for pair in combinations(speakers, 2):
                persons_in_dialog_combinations.setdefault((tuple(pair)), []).append([el[0], el[1]])
    return persons_in_dialog_list, persons_in_dialog_set, persons_in_dialog_combinations

In [61]:
dl, ds, dc = get_interactions_from_conversation(text.tags)

In [66]:
dl[:5]

[[(10, 10, 10, 68, 68, 10, 10), (0, 62)],
 [(68, 10, 68, 10, 10, 68, 10, 68, 10, 10), (204, 1102)],
 [(68,
   68,
   10,
   68,
   15,
   15,
   68,
   10,
   10,
   15,
   15,
   68,
   15,
   10,
   15,
   10,
   68,
   39,
   68,
   10,
   68,
   68,
   68,
   68,
   14,
   43,
   14,
   14,
   14,
   14),
  (1125, 1499)],
 [(14, 68, 14, 68), (1565, 1622)],
 [(68, 68, 68, 68, 68, 15, 15, 14, 14, 68, 14, 68, 14), (1645, 1983)]]

# Extractor

- Static

from Dinamic to Static by ***_flatten()***

- n предложений (любая длина окна)

# Aggregator