# Import libraries

In [1]:
import nltk
import stanza
import ast
from afinn import Afinn
afinn = Afinn()
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet as wn
from nltk.corpus import verbnet as vn
from nltk.corpus import opinion_lexicon
from nltk.stem import WordNetLemmatizer
from nltk.wsd import lesk
from nltk.corpus import wordnet
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import openpyxl

# Preprocessed Data Loading

In [2]:
# Load the data
#column_names = ["Sentence", "Label", "tokens_pos", "entities", "dependencies"]
#shuffled_df = pd.read_csv('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/datasets_preprocessed/shuffled_df.txt', sep='\t', names=column_names)

# Load from Excel file
column_names = ["Sentence", "Label", "Their_Label", "tokens_pos", "entities", "dependencies"]
shuffled_df = pd.read_excel('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/shuffled_1_df_4th_model_100_rows.xlsx', sheet_name="Clean", names=column_names)

# Remove leading and trailing spaces in the "Sentence" column
shuffled_df['Sentence'] = shuffled_df['Sentence'].str.strip()

# First 100 rows for examples
#shuffled_df = shuffled_df.head(100)

In [3]:
shuffled_df.head()

Unnamed: 0,Sentence,Label,Their_Label,tokens_pos,entities,dependencies
0,"In 2011, of State Hillary Clinton promised the...",0,0,"[('In', 'ADP'), ('2011', 'NUM'), (',', 'PUNCT'...","[('2011', 'DATE'), ('State', 'ORG'), ('Hillary...","[('In', 2, 'case'), ('2011', 8, 'obl'), (',', ..."
1,"’ Today saw the debut of ”,” a new social expe...",0,0,"[('’', 'PUNCT'), ('Today', 'NOUN'), ('saw', 'V...","[('Twitch', 'PRODUCT')]","[('’', 3, 'punct'), ('Today', 3, 'nsubj'), ('s..."
2,"Singer, actor and activist Harry Belafonte, 88...",1,1,"[('Singer', 'NOUN'), (',', 'PUNCT'), ('actor',...","[('Harry Belafonte', 'PERSON'), ('88', 'DATE')...","[('Singer', 12, 'nsubj'), (',', 3, 'punct'), (..."
3,"Last week, Roberts said the teams medical staf...",0,1,"[('Last', 'ADJ'), ('week', 'NOUN'), (',', 'PUN...","[('Last week', 'DATE'), ('Roberts', 'PERSON'),...","[('Last', 2, 'amod'), ('week', 5, 'obl:tmod'),..."
4,"Indeed, I believe that Waitrose does more than...",0,1,"[('Indeed', 'ADV'), (',', 'PUNCT'), ('I', 'PRO...","[('Waitrose', 'ORG'), ('UK', 'GPE')]","[('Indeed', 4, 'advmod'), (',', 4, 'punct'), (..."


In [4]:
shuffled_df.isnull().values.any()

False

In [5]:
shuffled_df['Label'].value_counts()

Label
0    71
2    21
1     8
Name: count, dtype: int64

# Mapping Labels

In [6]:
# Mapping dictionary: 0 - neutral, 1 - positive, 2 - negative
label_mapping = {2: 1, 3: 2, 4: 2}
shuffled_df['Label'] = shuffled_df['Label'].replace(label_mapping)

In [7]:
shuffled_df.head()

Unnamed: 0,Sentence,Label,Their_Label,tokens_pos,entities,dependencies
0,"In 2011, of State Hillary Clinton promised the...",0,0,"[('In', 'ADP'), ('2011', 'NUM'), (',', 'PUNCT'...","[('2011', 'DATE'), ('State', 'ORG'), ('Hillary...","[('In', 2, 'case'), ('2011', 8, 'obl'), (',', ..."
1,"’ Today saw the debut of ”,” a new social expe...",0,0,"[('’', 'PUNCT'), ('Today', 'NOUN'), ('saw', 'V...","[('Twitch', 'PRODUCT')]","[('’', 3, 'punct'), ('Today', 3, 'nsubj'), ('s..."
2,"Singer, actor and activist Harry Belafonte, 88...",1,1,"[('Singer', 'NOUN'), (',', 'PUNCT'), ('actor',...","[('Harry Belafonte', 'PERSON'), ('88', 'DATE')...","[('Singer', 12, 'nsubj'), (',', 3, 'punct'), (..."
3,"Last week, Roberts said the teams medical staf...",0,1,"[('Last', 'ADJ'), ('week', 'NOUN'), (',', 'PUN...","[('Last week', 'DATE'), ('Roberts', 'PERSON'),...","[('Last', 2, 'amod'), ('week', 5, 'obl:tmod'),..."
4,"Indeed, I believe that Waitrose does more than...",0,1,"[('Indeed', 'ADV'), (',', 'PUNCT'), ('I', 'PRO...","[('Waitrose', 'ORG'), ('UK', 'GPE')]","[('Indeed', 4, 'advmod'), (',', 4, 'punct'), (..."


In [8]:
shuffled_df.isnull().values.any()

False

In [9]:
shuffled_df['Label'].value_counts()

Label
0    71
1    29
Name: count, dtype: int64

# Turning strings back to lists and tuples

In [10]:
def convert_to_list(dependencies_str):
    # Check if it's a string and if it appears to be in the list of tuples format
    if isinstance(dependencies_str, str) and dependencies_str.startswith("[") and dependencies_str.endswith("]"):
        try:
            # Convert string representation of list back to actual list of tuples
            return ast.literal_eval(dependencies_str)
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing: {dependencies_str}")
            raise e
    elif isinstance(dependencies_str, list):
        # If it's already a list, return as is
        return dependencies_str
    else:
        # If it's another unexpected type, return as is or handle appropriately
        return dependencies_str

In [11]:
# Apply the function to your datasets
shuffled_df['dependencies'] = shuffled_df['dependencies'].apply(convert_to_list)
shuffled_df['tokens_pos'] = shuffled_df['tokens_pos'].apply(convert_to_list)
shuffled_df['entities'] = shuffled_df['entities'].apply(convert_to_list)

# Following the Modified Algorithm of Blame/Praise Identification

In [12]:
# Functions for Step 1: Find all the valid verbs in the sentence and safe them in the format: word, own_index, head_index, tag

def is_foreseeability_verb(verb):
    # This function checks whether a verb belongs to a predefined set of foreseeability-related verb classes.
    foreseeability_classes = {'communication', 'creation', 'consumption', 'competition', 'possession', 'motion'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lexname = synset.lexname().split('.')[1]  # Extracts the lexical category (i.e., type of action)
        if lexname in foreseeability_classes:  # Checks if the lexical category is in the foreseeability class
            return True  # Returns True if the verb matches any foreseeability category
    return False  # If no match is found, returns False


def is_coercion_verb(verb):
    # This function checks whether a verb belongs to a predefined set of coercion-related VerbNet classes.
    coercion_classes = {'urge-58.1', 'force-59', 'forbid-67'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lemma = synset.lemmas()[0]  # Gets the first lemma for each synset
        vn_classes = lemma.key().split('%')[0]  # Extracts the lemma key
        vn_class_ids = vn.classids(vn_classes)  # Fetches the VerbNet classes for the lemma
        if any(vn_class in coercion_classes for vn_class in vn_class_ids):  # Checks for a match in coercion classes
            return True  # If a match is found in coercion classes, return True
    return False  # If no match is found, return False


def is_valid_verb(verb):
    """
    Check if the given verb passes the foreseeability and coercion checks.
    """
    if is_foreseeability_verb(verb) and not is_coercion_verb(verb):
        return True
    return False
    

def find_all_valid_verbs_in_row(row):
    """
    Main function to find valid verbs
    """
    dependencies = row['dependencies']  # Dependency relations for the sentence
    tokens_pos = row['tokens_pos']  # POS-tagged tokens for the sentence

    all_verbs_list = []

    for own_index, (token, pos) in enumerate(tokens_pos):
        if 'VERB' in pos:  # Ensure the word is tagged as a verb
            if is_valid_verb(token):  # Check if it is valid
                # Ensure the dependency has exactly 3 values to unpack
                if len(dependencies[own_index]) == 3:
                    word, head_index, tag = dependencies[own_index]
                    #print(word, own_index + 1, head_index, tag)
                    all_verbs_list.append((word, own_index + 1, head_index, tag))
    return all_verbs_list

In [13]:
shuffled_df['All_Valid_Verbs'] = shuffled_df.apply(find_all_valid_verbs_in_row, axis=1)
shuffled_df = shuffled_df[['Sentence', 'Label', 'All_Valid_Verbs'] + [col for col in shuffled_df.columns if col not in ['Sentence', 'Label', 'All_Valid_Verbs']]]

In [14]:
def check_agent_validity(agent_combo, position, row):
    
    agent_is_valid = False
    name_addition = None
    agent, agent_head, agent_tag = agent_combo
    lemmatizer = WordNetLemmatizer()
    entities = row['entities']
    entities_str = ' '.join([f"({tup[0]}, {tup[1]})" for tup in entities])
    dependencies = row['dependencies']
    valid_ent_labels = ["PERSON", "NORP", "ORG", "GPE"]
    valid_additional_words = [
        "accountant", "administration", "advisor", "agent", "ambassador", "attorney", "bureaucrat", 
        "candidate", "chancellor", "chief", "commissioner", "congress", 
        "congressman", "congresswoman", "council", "councillor", "court", "critic", "deputy", "diplomat", 
        "executive", "gentleman", "governor", "head", "house", "immigrants", "inspector", 
        "journalist", "judge", "lady", "legislator", "manager", "man", "mayor", "member", "minister", "ministry", 
        "monarchy", "mr.", "mrs.", "ms.", "officer", "office", "ombudsman", 
        "parliament", "parliamentary", "person", "police", "president", "prosecutor", "representative", 
        "secretary", "secretary", "senate", "senator", "speaker", "whip", "woman", 
        "he", "she", "they", "i", "we", "you",
        "anyone", "anybody", "everyone", "everybody", "someone", "somebody", "no one", "nobody", "each", "both", "few", "many", "several", "some", 
        "this", "that", "these", "those"
    ]
    
    # Check if the related_word is a valid agent based on NER 
    for entity, label in entities: 
        if agent in entity and label in valid_ent_labels:  
            agent_is_valid = True

    # Check if the word is in additional valid agent words
    if not agent_is_valid and lemmatizer.lemmatize(agent.lower()) in valid_additional_words:
        agent_is_valid = True  

    # Check if the word has additional words with "appos" or "flat" tags connected to it 
    # (ex. "Singer, actor and activist Harry Belafonte" - singer is nsubj (not valid agent) but Harry Belafonte is a valid agent)
    for name_combo in dependencies:
        if len(agent_combo) == 3:
            name, name_head, name_tag = name_combo
            if name_head == position and name_tag in ["flat", "appos"]:
                if name in entities_str:
                    agent_is_valid = True
                    name_addition = name                  
    
    return agent_is_valid, name_addition

def find_valid_agent(verb_combo, row):
    
    passive = False
    agent_is_valid = False
    valid_agent = None
    name_addition = None
    verb, own_index, head_index, tag = verb_combo
    #print("Verb Combo: ", verb, own_index, head_index, tag)
    dependencies = row['dependencies']

    for position, agent_combo in enumerate(dependencies):
        if len(agent_combo) == 3:
            agent, agent_head, agent_tag = agent_combo
            #print("Agent Combo: ", agent, agent_head, agent_tag)
            # Not Passive
            if agent_head == own_index and agent_tag in ['nsubj']:
                agent_is_valid, name_addition = check_agent_validity(agent_combo, position+1, row)
                #print("Found own valid agent - not passive: ", agent)
            if agent_is_valid == True:
                if name_addition != None:
                    valid_agent = agent + " " + name_addition
                else:
                    valid_agent = agent
                return valid_agent, passive
                
    for position, agent_combo in enumerate(dependencies):
        if len(agent_combo) == 3:
            agent, agent_head, agent_tag = agent_combo
            #print("Agent Combo: ", agent, agent_head, agent_tag)
            # Not Passive
            if agent_head == head_index and agent_tag in ['nsubj']: 
                agent_is_valid, name_addition = check_agent_validity(agent_combo, position+1, row)
                #print("Found head valid agent - not passive: ", agent)
            if agent_is_valid == True:
                if name_addition != None:
                    valid_agent = agent + " " + name_addition
                else:
                    valid_agent = agent
                return valid_agent, passive
                
    for position, agent_combo in enumerate(dependencies):
        if len(agent_combo) == 3:
            agent, agent_head, agent_tag = agent_combo
            #print("Agent Combo: ", agent, agent_head, agent_tag)
            # Passive
            if agent_is_valid == False:
                if agent_head == own_index and agent_tag in ['nsubj:pass']:
                    passive = True
                    agent_is_valid = True
                    #print("Found own valid agent - passive: ", agent)
            if agent_is_valid == True:
                valid_agent = agent
                return valid_agent, passive
                
    for position, agent_combo in enumerate(dependencies):
        if len(agent_combo) == 3:
            agent, agent_head, agent_tag = agent_combo
            #print("Agent Combo: ", agent, agent_head, agent_tag)
            # Checking if the agent of the root of that verb is valid
            if agent_head == head_index and agent_tag in ['nsubj:pass']: 
                passive = True
                agent_is_valid = True
                #print("Found head valid agent - passive: ", agent)
            if agent_is_valid == True:
                valid_agent = agent
                return valid_agent, passive

    return valid_agent, passive

#############################################################################

def find_valid_object(verb_combo, row):
    
    valid_object = None
    xcomp_word = None
    dependencies = row['dependencies']
    verb, own_index, head_index, tag = verb_combo

    # Priority of object tags: obj - xcomp_obj - iobj - xcomp_iobj - obl - xcomp_obl
    
    for object_combo in dependencies:
        if len(object_combo) == 3:
            object, object_head, object_tag = object_combo
            if object_head == own_index and object_tag == 'obj':
                valid_object = object
                #print("Found own valid obj: ", object)
                return valid_object,xcomp_word

    for i, xcomp_combo in enumerate(dependencies):
        if len(xcomp_combo) == 3:
            xcomp, xcomp_head, xcomp_tag = xcomp_combo
            if xcomp_head == own_index and xcomp_tag == 'xcomp':
                #print("Found xcomp: ", xcomp)
                for related_to_xcomp in dependencies:
                    if len(related_to_xcomp) == 3:
                        related_to_xcomp_object, related_to_xcomp_object_head, related_to_xcomp_object_tag = related_to_xcomp
                        if related_to_xcomp_object_head == i+1 and related_to_xcomp_object_tag == 'obj':
                            valid_object = related_to_xcomp_object
                            xcomp_word = xcomp
                            #print("Found xcomp valid obj: ", object)
                            return valid_object,xcomp_word
    
    for object_combo in dependencies:
        if len(object_combo) == 3:
            object, object_head, object_tag = object_combo
            if object_head == own_index and object_tag == 'iobj':
                valid_object = object
                #print("Found own valid iobj: ", object)
                return valid_object,xcomp_word

    for i, xcomp_combo in enumerate(dependencies):
        if len(xcomp_combo) == 3:
            xcomp, xcomp_head, xcomp_tag = xcomp_combo
            if xcomp_head == own_index and xcomp_tag == 'xcomp':
                #print("Found xcomp: ", xcomp)
                for related_to_xcomp in dependencies:
                    if len(related_to_xcomp) == 3:
                        related_to_xcomp_object, related_to_xcomp_object_head, related_to_xcomp_object_tag = related_to_xcomp
                        if related_to_xcomp_object_head == i+1 and related_to_xcomp_object_tag == 'iobj':
                            xcomp_word = xcomp
                            #print("Found xcomp valid iobj: ", object)
                            return valid_object,xcomp_word
    
    for object_combo in dependencies:
        if len(object_combo) == 3:
            object, object_head, object_tag = object_combo
            if object_head == own_index and object_tag == 'obl':
                valid_object = object
                #print("Found own valid obl: ", object)
                return valid_object,xcomp_word
    
    for i, xcomp_combo in enumerate(dependencies):
        if len(xcomp_combo) == 3:
            xcomp, xcomp_head, xcomp_tag = xcomp_combo
            if xcomp_head == own_index and xcomp_tag == 'xcomp':
                #print("Found xcomp: ", xcomp)
                for related_to_xcomp in dependencies:
                    if len(related_to_xcomp) == 3:
                        related_to_xcomp_object, related_to_xcomp_object_head, related_to_xcomp_object_tag = related_to_xcomp
                        if related_to_xcomp_object_head == i+1 and related_to_xcomp_object_tag == 'obl':
                            valid_object = related_to_xcomp_object
                            xcomp_word = xcomp
                            #print("Found xcomp valid obl: ", object)
                            return valid_object,xcomp_word
    
    return valid_object,xcomp

#############################################################################

def define_polarity(verb, obj):
    """
    Function to define the polarity of the verb + object combination.
    """
    context = f"{verb} {obj}"
    verb_sense = lesk(context.split(), verb, 'v')
    obj_sense = lesk(context.split(), obj, 'n')
    
    pos_score = neg_score = 0
    
    if verb_sense:
        swn_verb = swn.senti_synset(verb_sense.name())
        pos_score += swn_verb.pos_score()
        neg_score += swn_verb.neg_score()
    
    if obj_sense:
        swn_obj = swn.senti_synset(obj_sense.name())
        pos_score += swn_obj.pos_score()
        neg_score += swn_obj.neg_score()

    afinn_score = afinn.score(context)
    if afinn_score > 0:
        pos_score += afinn_score
    else:
        neg_score += abs(afinn_score)

    subj_pos = sum([1 for token in context.split() if token in opinion_lexicon.positive()])
    subj_neg = sum([1 for token in context.split() if token in opinion_lexicon.negative()])
    
    pos_score += subj_pos
    neg_score += subj_neg

    return 1 if pos_score > neg_score else 2 if neg_score > pos_score else 0

def adjust_sentiment_for_negation(row, polarity, verb_combo):
    """
    Function to adjust the sentiment polarity for negation.
    """
    word, index, head_index, tag = verb_combo
    dependencies = row['dependencies']
    neg_word = None

    for related in dependencies:
        if len(related) == 3:
            related_word, related_head, related_rel = related
            if related_head == index and related_rel in ['advmod'] and related_word in ['not', 'n’t', 'no', 'never', 'barely', 'hardly', 'scarcely', 'rarely', 'seldom', 'neither', 'nor']:
                if polarity == 1:
                    polarity = 2
                    neg_word = related_word
                    return polarity, neg_word
                if polarity == 2:
                    polarity = 1
                    neg_word = related_word
                    return polarity, neg_word
    return polarity, neg_word

#############################################################################

def find_all_valid_events_and_polarities(row, all_verbs_list):

    all_valid_events_and_polarities = []
    valid_agent = None
    valid_object = None
    valid_xcomp = None
    neg_word = None
    polarity = 0

    for verb_combo in all_verbs_list:
        verb, own_index, head_index, tag = verb_combo

        # Find Valid Agent for that verb
        valid_agent, passive = find_valid_agent(verb_combo, row)
        if valid_agent == None:
            continue

        
        # Find Object for that verb
        # 2 different algorithms for passive and not passive
        if passive == False:
            # Looking for own object or over xcomp
            valid_object, valid_xcomp = find_valid_object(verb_combo, row)
            if valid_object == None:
                continue

        else:
            # Passive means that nsubj:pass is an object and agent is obl:agent (obl?)
            for object_combo in row['dependencies']:
                if len(object_combo) == 3:
                    object, object_head, object_tag = object_combo
                    if object_head == own_index and object_tag == 'obl:agent':
                        valid_object = valid_agent
                        valid_agent = None
                        # Check if that obl is a valid agent
                        if check_agent_validity(object, row)[0] == True:
                            valid_agent = object
                        if valid_agent == None:
                            continue
            if valid_object == None:
                continue

        
        # Define polarity of the combination verb + object
        if valid_object != None:
            polarity = define_polarity(verb, valid_object)

        
        # Adjust polarity according to the negations
        if polarity != 0:
            polarity, neg_word = adjust_sentiment_for_negation(row, polarity, verb_combo)

        # If polarity was chenged - add negative word to the verb for better clarity
        if neg_word != None:
            verb = neg_word + " " + verb
        
        # If there was xcomp - add it between verb and object
        if valid_xcomp != None:
            #print("Added xcomp to object")
            valid_object = valid_xcomp + " " + valid_object
    
        # Append triple (valid_agent, verb, valid_object) and polarity to the list
        if valid_agent != None and valid_object != None:
            #print("Valid Agent: ", valid_agent)
            #print("Verb: ", verb, " - position: ", own_index)
            #print("Valid Object: ", valid_object)
            #print("Final polarity: ", polarity)
            all_valid_events_and_polarities.append((valid_agent, verb, valid_object, polarity))
    
    return all_valid_events_and_polarities

In [15]:
def new_algorithm(row):
    all_verbs_list = find_all_valid_verbs_in_row(row) # Step 1 - Find all the valid verbs in the sentence and safe them in the format: word, own_index, head_index, tag
    if len(all_verbs_list) == 0:
        return 0

    triplets_and_polarities = find_all_valid_events_and_polarities(row, all_verbs_list) # Step 2 - For each valid verb find its valid agent, object, define polarity and safe them in the format: agent, verb, object, polarity
    if len(triplets_and_polarities) == 0:
        return 0
    return triplets_and_polarities

In [16]:
shuffled_df['All_Triplets_and_Polarities'] = shuffled_df.apply(new_algorithm, axis=1)
shuffled_df = shuffled_df[['Sentence', 'Label', 'All_Triplets_and_Polarities'] + [col for col in shuffled_df.columns if col not in ['Sentence', 'Label', 'All_Triplets_and_Polarities']]]

In [17]:
shuffled_df

Unnamed: 0,Sentence,Label,All_Triplets_and_Polarities,All_Valid_Verbs,Their_Label,tokens_pos,entities,dependencies
0,"In 2011, of State Hillary Clinton promised the...",0,0,"[(promised, 8, 0, root), (use, 18, 8, xcomp)]",0,"[(In, ADP), (2011, NUM), (,, PUNCT), (of, ADP)...","[(2011, DATE), (State, ORG), (Hillary Clinton,...","[(In, 2, case), (2011, 8, obl), (,, 2, punct),..."
1,"’ Today saw the debut of ”,” a new social expe...",0,0,"[(saw, 3, 0, root), (playing, 15, 13, acl)]",0,"[(’, PUNCT), (Today, NOUN), (saw, VERB), (the,...","[(Twitch, PRODUCT)]","[(’, 3, punct), (Today, 3, nsubj), (saw, 0, ro..."
2,"Singer, actor and activist Harry Belafonte, 88...",1,"[(Singer Harry, announce, endorsement, 1)]","[(announce, 12, 0, root), (avowed, 20, 21, amod)]",1,"[(Singer, NOUN), (,, PUNCT), (actor, NOUN), (a...","[(Harry Belafonte, PERSON), (88, DATE), (Democ...","[(Singer, 12, nsubj), (,, 3, punct), (actor, 1..."
3,"Last week, Roberts said the teams medical staf...",0,0,"[(said, 5, 0, root), (surprised, 12, 5, ccomp)...",1,"[(Last, ADJ), (week, NOUN), (,, PUNCT), (Rober...","[(Last week, DATE), (Roberts, PERSON), (Kersha...","[(Last, 2, amod), (week, 5, obl:tmod), (,, 5, ..."
4,"Indeed, I believe that Waitrose does more than...",0,"[(Waitrose, does, more, 2), (Waitrose, support...","[(does, 7, 4, ccomp), (support, 14, 7, advcl)]",1,"[(Indeed, ADV), (,, PUNCT), (I, PRON), (believ...","[(Waitrose, ORG), (UK, GPE)]","[(Indeed, 4, advmod), (,, 4, punct), (I, 4, ns..."
...,...,...,...,...,...,...,...,...
95,"Bombelyn, who had worked with a grassroots gro...",1,"[(Bombelyn, attacked, Cahill, 2), (Bombelyn, c...","[(worked, 5, 1, acl:relcl), (attacked, 23, 0, ...",2,"[(Bombelyn, PROPN), (,, PUNCT), (who, PRON), (...","[(Bombelyn, PERSON), (Rutgers University, ORG)...","[(Bombelyn, 23, nsubj), (,, 1, punct), (who, 5..."
96,After the first known use of a military-grade ...,1,"[(Britain, blamed, President, 2), (Britain, ex...","[(blamed, 22, 0, root), (attempted, 29, 30, am...",0,"[(After, ADP), (the, DET), (first, ADJ), (know...","[(first, ORDINAL), (European, NORP), (World Wa...","[(After, 5, case), (the, 5, det), (first, 5, a..."
97,"Of the 712 Democratic superdelegates, 449 (or ...",1,"[(449 percent, support, Clinton, 1)]","[(support, 15, 0, root), (according, 18, 24, c...",0,"[(Of, ADP), (the, DET), (712, NUM), (Democrati...","[(712, CARDINAL), (Democratic, NORP), (449, CA...","[(Of, 5, case), (the, 5, det), (712, 5, nummod..."
98,"From the start of the war, the Pentagon has ho...",0,"[(Pentagon, hoped, start, 0), (Pentagon, surre...","[(hoped, 11, 0, root), (invading, 17, 18, amod...",0,"[(From, ADP), (the, DET), (start, NOUN), (of, ...","[(Pentagon, ORG), (Iraqi, NORP)]","[(From, 3, case), (the, 3, det), (start, 11, o..."


In [18]:
shuffled_df['All_Triplets_and_Polarities'].isnull().values.any()

False

In [19]:
shuffled_df['All_Valid_Verbs'].isnull().values.any()

False

In [20]:
shuffled_df['All_Triplets_and_Polarities'].value_counts()

All_Triplets_and_Polarities
0                                                                       38
[(Lebanon, accuses, Israel, 2)]                                          1
[(Sen. Booker, testified, move, 0), (we, count, him, 0)]                 1
[(Tyler Florence, tackles, challenge, 2)]                                1
[(De Blasio, castigated, Trump, 2), (De Blasio, sharing, values, 0)]     1
                                                                        ..
[(Snowden, performed, service, 0)]                                       1
[(Obama, failed, LGBTs, 2)]                                              1
[(Jim Harbaugh, asked, autograph it, 0)]                                 1
[(Trump, criticized, time, 2)]                                           1
[(Pentagon, hoped, start, 0), (Pentagon, surrender, help, 1)]            1
Name: count, Length: 63, dtype: int64

# Export in Excel

In [21]:
# Export the first x rows to an Excel file
shuffled_df.to_excel('shuffled_1_df_4th_model_more_agents_100_rows.xlsx', index=False)

# ГЛАГОЛЫ ТОЖЕ МОЖНО ПРОВЕРИТЬ

In [22]:
counter = 0
for events_list in shuffled_df['All_Triplets_and_Polarities']:
    if events_list != 0:
        for event in events_list:
            if event[1] in ['do', 'does', 'did', 'done', 'doing']:
                counter = counter + 1
                print(event)

('Waitrose', 'does', 'more', 2)
('Trump', 'doing', 'job', 2)


In [23]:
counter

2

In [24]:
counter / shuffled_df.shape[0]

0.02

In [25]:
counter = 0
for events_list in shuffled_df['All_Triplets_and_Polarities']:
    if events_list != 0:
        for event in events_list:
            if event[1] in ["be", "am", "is", "are", "was", "were", "being", "been"]:
                counter = counter + 1
                print(event)
                print(events_list)

In [26]:
counter

0

In [27]:
counter / shuffled_df.shape[0]

0.0