## Load Libraries & Data

In [16]:
import textacy

In [3]:
# IMPORTS
import re, spacy, textacy
import numpy as np, pandas as pd

# If needed
parentheticals = [ "\(laughter\)", "\(applause\)", "\(music\)",  
                  "\(video\)", "\(laughs\)", "\(applause ends\)", 
                  "\(audio\)", "\(singing\)", "\(music ends\)", 
                  "\(cheers\)", "\(cheering\)", "\(recording\)", 
                  "\(beatboxing\)", "\(audience\)", "\(guitar strum\)", 
                  "\(clicks metronome\)", "\(sighs\)", "\(guitar\)", 
                  "\(marimba sounds\)", "\(drum sounds\)" ]

def remove_parentheticals(text):
    global parentheticals
    new_text = text
    for rgx_match in parentheticals:
        new_text = re.sub(rgx_match, ' ', new_text.lower(), 
                          flags=re.IGNORECASE)
    return new_text

# Loading the Data in a gendered partitioned fashion: 
talks_m = pd.read_csv('talks_male.csv', index_col='Talk_ID')
talks_f = pd.read_csv('talks_female.csv', index_col='Talk_ID')
talks_nog = pd.read_csv('talks_nog.csv', index_col='Talk_ID')
talks_all = pd.concat([talks_m, talks_f, talks_nog])

# And then grabbing on the texts of the talks:
texts = talks_all.text.tolist()
texts_f = talks_f.text.tolist()
texts_m = talks_m.text.tolist()

print(f"From our {talks_all.shape[0]}x{talks_all.shape[1]} CSV, \
we have a list of {len(texts)} talks: {len(texts_f)} by women and \
{len(texts_m)} by men.")

From our 992x14 CSV, we have a list of 992 talks: 260 by women and 720 by men.


## Textacy

Textacy is fussy about the size of texts being fed it, responding with `ValueError`s for `nlp.maxlength`. The workaround here is to create a `docs` object which is a list of spaCy `doc`s. The preview below demonstrates that each item in the list has the characteristics of a spaCy doc.

Textacy does have a `corpus` object, but it is not straightforward to implement.

```python
corpus = textacy.Corpus("en_core_web_sm", data=docs)
```

In [36]:
# Load the Space pipeline to be used
nlp = spacy.load('en_core_web_lg')

# Use the pipe method to feed documents 
docs = list(nlp.pipe(texts_f))

docs[0]._.preview

### Working through a Document

In [75]:
for token in docs[0][0:5]:
    print (token, token.tag_, token.pos_) # spacy.explain(token.tag_)

   _SP SPACE
If IN SCONJ
you PRP PRON
're VBP AUX
here RB ADV


In [40]:
SVOs = list(textacy.extract.triples.subject_verb_object_triples(docs[0]))
len(SVOs)

146

In [76]:
for item in SVOs[0:10]:
    print(item)

SVOTriple(subject=[development], verb=[will, save], object=[us])
SVOTriple(subject=[She], verb=[turned], object=[to, be, a, much, bigger, dog, than, I, 'd, anticipated])
SVOTriple(subject=[part], verb=[handled], object=[percent])
SVOTriple(subject=[that], verb=[bring], object=[truck, trips])
SVOTriple(subject=[area], verb=[has], object=[one])
SVOTriple(subject=[I], verb=[was, contacted], object=[Parks, Department])
SVOTriple(subject=[I], verb=[mentioned], object=[that])
SVOTriple(subject=[she], verb=[pulled], object=[me])
SVOTriple(subject=[she], verb=[were], object=[dragging, me])
SVOTriple(subject=[I], verb=[wo, n't, mention], object=[that])


In [44]:
for item in SVOs[0:10]:
    print(item[0])

[development]
[She]
[part]
[that]
[area]
[I]
[I]
[she]
[she]
[I]


In [60]:
for item in SVOs[0:10]:
    if str(item[0]) == '[I]':
        print(item)

SVOTriple(subject=[I], verb=[was, contacted], object=[Parks, Department])
SVOTriple(subject=[I], verb=[mentioned], object=[that])
SVOTriple(subject=[I], verb=[wo, n't, mention], object=[that])


In [61]:
for item in SVOs:
    if str(item[0]) == '[I]':
        print(item)

SVOTriple(subject=[I], verb=[was, contacted], object=[Parks, Department])
SVOTriple(subject=[I], verb=[mentioned], object=[that])
SVOTriple(subject=[I], verb=[wo, n't, mention], object=[that])
SVOTriple(subject=[I], verb=['m, going], object=[to, exchange, marriage, vows, with, my, beloved])
SVOTriple(subject=[I], verb=[do], object=[which])
SVOTriple(subject=[I], verb=[watched], object=[half])
SVOTriple(subject=[I], verb=[told], object=[you])
SVOTriple(subject=[I], verb=[wrote], object=[dollar, transportation, grant])
SVOTriple(subject=[I], verb=[like], object=[that])
SVOTriple(subject=[I], verb=[have], object=[all])
SVOTriple(subject=[I], verb=[do, not, expect], object=[individuals, corporations, government])
SVOTriple(subject=[I], verb=['ll, tell], object=[you])
SVOTriple(subject=[I], verb=[like], object=[what])
SVOTriple(subject=[I], verb=[told], object=[you])
SVOTriple(subject=[I], verb=['ve, embraced], object=[capitalist])
SVOTriple(subject=[I], verb=[do, n't, have], object=[proble

The next step will be to locate the last item in the verb list.

In [65]:
for item in SVOs:
    if str(item[0]) == '[I]':
        print(item[1][-1:])

[contacted]
[mentioned]
[mention]
[going]
[do]
[watched]
[told]
[wrote]
[like]
[have]
[expect]
[tell]
[like]
[told]
[embraced]
[have]
[have]
[trying]
[have]
[asked]


In [66]:
for item in SVOs:
    if str(item[0]) == '[I]':
        print(item[1][-1:], item[2])

[contacted] [Parks, Department]
[mentioned] [that]
[mention] [that]
[going] [to, exchange, marriage, vows, with, my, beloved]
[do] [which]
[watched] [half]
[told] [you]
[wrote] [dollar, transportation, grant]
[like] [that]
[have] [all]
[expect] [individuals, corporations, government]
[tell] [you]
[like] [what]
[told] [you]
[embraced] [capitalist]
[have] [problem]
[have] [problem]
[trying] [to, build]
[have] [time]
[asked] [him]


In [74]:
for item in SVOs:
    if str(item[0]) == '[She]':
        print(item[1][-1:], item[2])
    if str(item[0]) == '[she]':
        print(item[1][-1:], item[2])

[turned] [to, be, a, much, bigger, dog, than, I, 'd, anticipated]
[pulled] [me]
[were] [dragging, me]
[kept] [dragging, me]


### Useful Code

In [85]:
def actions (terms, doc):
    svotriples = list(textacy.extract.triples.subject_verb_object_triples(doc))
    for term in terms:
        for item in svotriples:
            if str(item[0]) == term:
                print(item[1][-1:], item[2])

**Next steps:**

- Rewrite code to return appended lists for I, He, She. 
- Work on adaptation for objective cases. 
- Work on code to compile / visualize this as a network graph (?). So count up repeated verbs, etc.

- *Do we need NLTK code to compare results?*

In [87]:
terms = ['[I]']
actions(terms, docs[0])

[contacted] [Parks, Department]
[mentioned] [that]
[mention] [that]
[going] [to, exchange, marriage, vows, with, my, beloved]
[do] [which]
[watched] [half]
[told] [you]
[wrote] [dollar, transportation, grant]
[like] [that]
[have] [all]
[expect] [individuals, corporations, government]
[tell] [you]
[like] [what]
[told] [you]
[embraced] [capitalist]
[have] [problem]
[have] [problem]
[trying] [to, build]
[have] [time]
[asked] [him]


In [89]:
for doc in docs[0:2]:
    actions(terms, doc)

[contacted] [Parks, Department]
[mentioned] [that]
[mention] [that]
[going] [to, exchange, marriage, vows, with, my, beloved]
[do] [which]
[watched] [half]
[told] [you]
[wrote] [dollar, transportation, grant]
[like] [that]
[have] [all]
[expect] [individuals, corporations, government]
[tell] [you]
[like] [what]
[told] [you]
[embraced] [capitalist]
[have] [problem]
[have] [problem]
[trying] [to, build]
[have] [time]
[asked] [him]
[came] [myself]
[presented] [myself]
[going] [to, have, a, party, and, a, cake, and, get, a, lot, of, presents]
[heard] [phrase]
[paid] [attention]
[noticed] [it]
[known] [this]
[realized] [information]
[left] [kitchen]
[missed] [age]
[help] [someone]
[found] [Bill]
[turned] [to, run]
[turned] [it, seven]
[said] [it, seven]
[know] [it]
[turning] [seven]
[planned] [party]
[need] [to, speak, to, you, privately]
[told] [them]
[started] [to, tell, you, your, birthday, was, September, 10th]
[had] [to, change, the, date, of, my, slumber, party, with, all, of, my, girl