# An introduction to `relatio` 
**Runtime $\sim$ 20min**

Original paper: ["Text Semantics Capture Political and Economic Narratives"](https://arxiv.org/abs/2108.01720)

----------------------------

This is a short demo of the package `relatio`.  It takes as input a text corpus and outputs a list of narrative statements. The pipeline is unsupervised: the user does not need to specify narratives beforehand. Narrative statements are defined as tuples of semantic roles with a (agent, verb, patient, attribute) structure. 

Here, we present the main wrapper functions to quickly obtain narrative statements from a corpus.

----------------------------

In this tutorial, we work with tweets from candidates at the French Presidential Elections (2022).

----------------------------

In [None]:
# Catch warnings for an easy ride
from relatio import FileLogger
logger = FileLogger(level = 'WARNING')

In [None]:
from relatio import list_data
list_data()

In [None]:
from relatio import load_data
df = load_data(dataset = "tweets_candidates_french_elections", content = "raw")
df = df[df['candidate'] == 'yjadot']
df.head()

In [None]:
from relatio import Preprocessor

import string
alphabet_string = string.ascii_lowercase
alphabet_list = list(alphabet_string) + ['rt']

p = Preprocessor(
    spacy_model = "fr_core_news_sm",
    remove_punctuation = True,
    remove_digits = True,
    lowercase = True,
    lemmatize = True,
    remove_chars = ["\"",'-',"^",".","?","!",";","(",")",",",":","\'","+","&","|","/","{","}",
                    "~","_","`","[","]",">","<","=","*","%","$","@","#","’"],
    stop_words = alphabet_list,
    n_process = -1,
    batch_size = 100
)

df = p.split_into_sentences(
    df, output_path = None, progress_bar = True
)

In [None]:
sentence_index, roles = p.extract_svos(df['sentence'], progress_bar = True)

for svo in roles[0:20]: print(svo)

In [None]:
postproc_roles = p.process_roles(roles, 
                                 max_length = 50,
                                 progress_bar = True,
                                 output_path = './output/postproc_roles.json')

from relatio.utils import load_roles
postproc_roles = load_roles('./output/postproc_roles.json')

for d in postproc_roles[0:5]: print(d)

In [None]:
known_entities = p.mine_entities(
    df['sentence'], 
    clean_entities = True, 
    progress_bar = True,
    output_path = './output/entities.pkl'
)

from relatio.utils import load_entities
known_entities = load_entities('./output/entities.pkl')

for n in known_entities.most_common(10): print(n)

In [None]:
top_known_entities = [e[0] for e in list(known_entities.most_common(100)) if e[0] != '']

In [None]:
from relatio import Embeddings
nlp_model = Embeddings("spaCy", "fr_core_news_sm", sentences=df['sentence']) 

In [None]:
from relatio import NarrativeModel
from relatio.utils import prettify
from collections import Counter

In [None]:
m1 = NarrativeModel(clustering = 'hdbscan',
                    PCA = False,
                    UMAP = False,
                    roles_considered = ['ARG0', 'B-V', 'B-ARGM-NEG', 'ARG1'],
                    roles_with_known_entities = ['ARG0','ARG1'],
                    known_entities = top_known_entities,
                    assignment_to_known_entities = 'character_matching',
                    roles_with_unknown_entities = ['ARG0','ARG1'],
                    embeddings_model = nlp_model,
                    threshold = 0.3)    

m1.fit(postproc_roles, weight_by_frequency = True)

In [None]:
narratives = m1.predict(postproc_roles, progress_bar = True)

In [None]:
from relatio.utils import prettify

pretty_narratives = []
for n in narratives: 
    pretty_narratives.append(prettify(n))

for i in range(10):           
    print(roles[i])
    print(postproc_roles[i])
    print(pretty_narratives[i])

In [None]:
m2 = NarrativeModel(clustering = 'kmeans',
                    PCA = True,
                    UMAP = True,
                    roles_considered = ['ARG0', 'B-V', 'B-ARGM-NEG', 'ARG1'],
                    roles_with_known_entities = ['ARG0','ARG1'],
                    known_entities = top_known_entities,
                    assignment_to_known_entities = 'character_matching',
                    roles_with_unknown_entities = ['ARG0','ARG1'],
                    embeddings_model = nlp_model,
                    threshold = 0.3)    

m2.fit(postproc_roles, weight_by_frequency = True, progress_bar = True)

In [None]:
narratives = m2.predict(postproc_roles, progress_bar = True)

In [None]:
from relatio.utils import prettify

pretty_narratives = []
for n in narratives: 
    pretty_narratives.append(prettify(n))

for i in range(10):           
    print(roles[i])
    print(postproc_roles[i])
    print(pretty_narratives[i])

In [None]:
from relatio import build_graph, draw_graph

G = build_graph(
    narratives, 
    top_n = 100, 
    prune_network = True
)

draw_graph(
    G,
    notebook = True,
    show_buttons = False,
    width="1600px",
    height="1000px",
    output_filename = './output/network_of_narratives.html'
    )