# Import libraries

In [1]:
import nltk
import stanza
import ast
from afinn import Afinn
afinn = Afinn()
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet as wn
from nltk.corpus import verbnet as vn
from nltk.corpus import opinion_lexicon
from nltk.wsd import lesk
from nltk.corpus import wordnet
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import openpyxl

# Preprocessed Data Loading

In [2]:
# Load the data
column_names = ["Sentence", "Label", "tokens_pos", "entities", "dependencies"]
shuffled_df = pd.read_csv('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/datasets_preprocessed/shuffled_df.txt', sep='\t', names=column_names)

# Remove leading and trailing spaces in the "Sentence" column
shuffled_df['Sentence'] = shuffled_df['Sentence'].str.strip()

# First 100 rows for examples
#shuffled_df = shuffled_df.head(100)

In [3]:
shuffled_df.head()

Unnamed: 0,Sentence,Label,tokens_pos,entities,dependencies
0,The faculty's Academic Senate threw its suppor...,0,"[('The', 'DET'), ('faculty', 'NOUN'), (""'s"", '...","[('Academic Senate', 'ORG'), ('Monday', 'DATE'...","[('The', 2, 'det'), ('faculty', 5, 'nmod:poss'..."
1,But the true scale of the movement remains unc...,3,"[('But', 'CCONJ'), ('the', 'DET'), ('true', 'A...","[('Miskito', 'NORP'), ('Nicaraguan', 'NORP')]","[('But', 8, 'cc'), ('the', 4, 'det'), ('true',..."
2,(Branstad is ripping Cruz for taking the bold ...,0,"[('(', 'PUNCT'), ('Branstad', 'PROPN'), ('is',...","[('Branstad', 'PERSON'), ('Cruz', 'PERSON'), (...","[('(', 4, 'punct'), ('Branstad', 4, 'nsubj'), ..."
3,But the Marlins have failed to make the postse...,0,"[('But', 'CCONJ'), ('the', 'DET'), ('Marlins',...","[('Marlins', 'ORG'), ('Loria', 'PERSON')]","[('But', 5, 'cc'), ('the', 3, 'det'), ('Marlin..."
4,"There is nothing new yet on the table, but one...",1,"[('There', 'PRON'), ('is', 'VERB'), ('nothing'...","[('one', 'CARDINAL'), ('Gannett', 'ORG'), ('Tr...","[('There', 2, 'expl'), ('is', 0, 'root'), ('no..."


In [4]:
shuffled_df.isnull().values.any()

False

In [5]:
shuffled_df['Label'].value_counts()

Label
0    3390
3    1631
1     814
4     237
2     155
Name: count, dtype: int64

# Mapping Labels

In [6]:
# Mapping dictionary: 0 - neutral, 1 - positive, 2 - negative
label_mapping = {2: 1, 3: 2, 4: 2}
shuffled_df['Label'] = shuffled_df['Label'].replace(label_mapping)

In [7]:
shuffled_df.head()

Unnamed: 0,Sentence,Label,tokens_pos,entities,dependencies
0,The faculty's Academic Senate threw its suppor...,0,"[('The', 'DET'), ('faculty', 'NOUN'), (""'s"", '...","[('Academic Senate', 'ORG'), ('Monday', 'DATE'...","[('The', 2, 'det'), ('faculty', 5, 'nmod:poss'..."
1,But the true scale of the movement remains unc...,2,"[('But', 'CCONJ'), ('the', 'DET'), ('true', 'A...","[('Miskito', 'NORP'), ('Nicaraguan', 'NORP')]","[('But', 8, 'cc'), ('the', 4, 'det'), ('true',..."
2,(Branstad is ripping Cruz for taking the bold ...,0,"[('(', 'PUNCT'), ('Branstad', 'PROPN'), ('is',...","[('Branstad', 'PERSON'), ('Cruz', 'PERSON'), (...","[('(', 4, 'punct'), ('Branstad', 4, 'nsubj'), ..."
3,But the Marlins have failed to make the postse...,0,"[('But', 'CCONJ'), ('the', 'DET'), ('Marlins',...","[('Marlins', 'ORG'), ('Loria', 'PERSON')]","[('But', 5, 'cc'), ('the', 3, 'det'), ('Marlin..."
4,"There is nothing new yet on the table, but one...",1,"[('There', 'PRON'), ('is', 'VERB'), ('nothing'...","[('one', 'CARDINAL'), ('Gannett', 'ORG'), ('Tr...","[('There', 2, 'expl'), ('is', 0, 'root'), ('no..."


In [8]:
shuffled_df.isnull().values.any()

False

In [9]:
shuffled_df['Label'].value_counts()

Label
0    3390
2    1868
1     969
Name: count, dtype: int64

In [10]:
shuffled_df

Unnamed: 0,Sentence,Label,tokens_pos,entities,dependencies
0,The faculty's Academic Senate threw its suppor...,0,"[('The', 'DET'), ('faculty', 'NOUN'), (""'s"", '...","[('Academic Senate', 'ORG'), ('Monday', 'DATE'...","[('The', 2, 'det'), ('faculty', 5, 'nmod:poss'..."
1,But the true scale of the movement remains unc...,2,"[('But', 'CCONJ'), ('the', 'DET'), ('true', 'A...","[('Miskito', 'NORP'), ('Nicaraguan', 'NORP')]","[('But', 8, 'cc'), ('the', 4, 'det'), ('true',..."
2,(Branstad is ripping Cruz for taking the bold ...,0,"[('(', 'PUNCT'), ('Branstad', 'PROPN'), ('is',...","[('Branstad', 'PERSON'), ('Cruz', 'PERSON'), (...","[('(', 4, 'punct'), ('Branstad', 4, 'nsubj'), ..."
3,But the Marlins have failed to make the postse...,0,"[('But', 'CCONJ'), ('the', 'DET'), ('Marlins',...","[('Marlins', 'ORG'), ('Loria', 'PERSON')]","[('But', 5, 'cc'), ('the', 3, 'det'), ('Marlin..."
4,"There is nothing new yet on the table, but one...",1,"[('There', 'PRON'), ('is', 'VERB'), ('nothing'...","[('one', 'CARDINAL'), ('Gannett', 'ORG'), ('Tr...","[('There', 2, 'expl'), ('is', 0, 'root'), ('no..."
...,...,...,...,...,...
6222,Conservatives have sided with government moves...,2,"[('Conservatives', 'PROPN'), ('have', 'AUX'), ...","[('Conservatives', 'NORP'), ('Shin', 'PERSON')...","[('Conservatives', 3, 'nsubj'), ('have', 3, 'a..."
6223,"However, former prosecutor Andrew McCarthy, no...",0,"[('However', 'ADV'), (',', 'PUNCT'), ('former'...","[('Andrew McCarthy', 'PERSON'), ('National Rev...","[('However', 15, 'advmod'), (',', 1, 'punct'),..."
6224,Dunham’s accusing the left of “hostility” and ...,0,"[('Dunham', 'PROPN'), ('’s', 'AUX'), ('accusin...","[('Dunham’s', 'PERSON'), ('thousands', 'CARDIN...","[('Dunham', 3, 'nsubj'), ('’s', 3, 'aux'), ('a..."
6225,"In the aftermath of the attack, Thoiry is warn...",0,"[('In', 'ADP'), ('the', 'DET'), ('aftermath', ...","[('Thoiry', 'PERSON'), ('European', 'NORP')]","[('In', 3, 'case'), ('the', 3, 'det'), ('after..."


# Turning strings back to lists and tuples

In [11]:
def convert_to_list(dependencies_str):
    # Check if it's a string and if it appears to be in the list of tuples format
    if isinstance(dependencies_str, str) and dependencies_str.startswith("[") and dependencies_str.endswith("]"):
        try:
            # Convert string representation of list back to actual list of tuples
            return ast.literal_eval(dependencies_str)
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing: {dependencies_str}")
            raise e
    elif isinstance(dependencies_str, list):
        # If it's already a list, return as is
        return dependencies_str
    else:
        # If it's another unexpected type, return as is or handle appropriately
        return dependencies_str

In [12]:
# Apply the function to your datasets
shuffled_df['dependencies'] = shuffled_df['dependencies'].apply(convert_to_list)
shuffled_df['tokens_pos'] = shuffled_df['tokens_pos'].apply(convert_to_list)
shuffled_df['entities'] = shuffled_df['entities'].apply(convert_to_list)

# Following the Modified Algorithm of Blame/Praise Identification

In [13]:
# Functions for Step 1: Find all the valid verbs in the sentence and safe them in the format: word, own_index, head_index, tag

def is_foreseeability_verb(verb):
    # This function checks whether a verb belongs to a predefined set of foreseeability-related verb classes.
    foreseeability_classes = {'communication', 'creation', 'consumption', 'competition', 'possession', 'motion'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lexname = synset.lexname().split('.')[1]  # Extracts the lexical category (i.e., type of action)
        if lexname in foreseeability_classes:  # Checks if the lexical category is in the foreseeability class
            return True  # Returns True if the verb matches any foreseeability category
    return False  # If no match is found, returns False


def is_coercion_verb(verb):
    # This function checks whether a verb belongs to a predefined set of coercion-related VerbNet classes.
    coercion_classes = {'urge-58.1', 'force-59', 'forbid-67'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lemma = synset.lemmas()[0]  # Gets the first lemma for each synset
        vn_classes = lemma.key().split('%')[0]  # Extracts the lemma key
        vn_class_ids = vn.classids(vn_classes)  # Fetches the VerbNet classes for the lemma
        if any(vn_class in coercion_classes for vn_class in vn_class_ids):  # Checks for a match in coercion classes
            return True  # If a match is found in coercion classes, return True
    return False  # If no match is found, return False


def is_valid_verb(verb):
    """
    Check if the given verb passes the foreseeability and coercion checks.
    """
    if is_foreseeability_verb(verb) and not is_coercion_verb(verb):
        return True
    return False
    

def find_all_valid_verbs_in_row(row):
    """
    Main function to find valid verbs
    """
    dependencies = row['dependencies']  # Dependency relations for the sentence
    tokens_pos = row['tokens_pos']  # POS-tagged tokens for the sentence

    all_verbs_list = []

    for own_index, (token, pos) in enumerate(tokens_pos):
        if 'VERB' in pos:  # Ensure the word is tagged as a verb
            if is_valid_verb(token):  # Check if it is valid
                # Ensure the dependency has exactly 3 values to unpack
                if len(dependencies[own_index]) == 3:
                    word, head_index, tag = dependencies[own_index]
                    #print(word, own_index + 1, head_index, tag)
                    all_verbs_list.append((word, own_index + 1, head_index, tag))
    return all_verbs_list

In [14]:
shuffled_df['All_Valid_Verbs'] = shuffled_df.apply(find_all_valid_verbs_in_row, axis=1)
shuffled_df = shuffled_df[['Sentence', 'Label', 'All_Valid_Verbs'] + [col for col in shuffled_df.columns if col not in ['Sentence', 'Label', 'All_Valid_Verbs']]]

In [15]:
shuffled_df

Unnamed: 0,Sentence,Label,All_Valid_Verbs,tokens_pos,entities,dependencies
0,The faculty's Academic Senate threw its suppor...,0,"[(threw, 6, 0, root)]","[(The, DET), (faculty, NOUN), ('s, PART), (Aca...","[(Academic Senate, ORG), (Monday, DATE), (the ...","[(The, 2, det), (faculty, 5, nmod:poss), ('s, ..."
1,But the true scale of the movement remains unc...,2,"[(warning, 14, 8, conj)]","[(But, CCONJ), (the, DET), (true, ADJ), (scale...","[(Miskito, NORP), (Nicaraguan, NORP)]","[(But, 8, cc), (the, 4, det), (true, 4, amod),..."
2,(Branstad is ripping Cruz for taking the bold ...,0,"[(ripping, 4, 0, root), (criticizing, 12, 10, ...","[((, PUNCT), (Branstad, PROPN), (is, AUX), (ri...","[(Branstad, PERSON), (Cruz, PERSON), (Iowa, GPE)]","[((, 4, punct), (Branstad, 4, nsubj), (is, 4, ..."
3,But the Marlins have failed to make the postse...,0,"[(failed, 5, 0, root), (marked, 18, 5, conj), ...","[(But, CCONJ), (the, DET), (Marlins, PROPN), (...","[(Marlins, ORG), (Loria, PERSON)]","[(But, 5, cc), (the, 3, det), (Marlins, 5, nsu..."
4,"There is nothing new yet on the table, but one...",1,"[(is, 2, 0, root), (said, 16, 2, conj)]","[(There, PRON), (is, VERB), (nothing, PRON), (...","[(one, CARDINAL), (Gannett, ORG), (Tribune Pub...","[(There, 2, expl), (is, 0, root), (nothing, 2,..."
...,...,...,...,...,...,...
6222,Conservatives have sided with government moves...,2,"[(sided, 3, 0, root), (expel, 8, 3, advcl), (a...","[(Conservatives, PROPN), (have, AUX), (sided, ...","[(Conservatives, NORP), (Shin, PERSON), (North...","[(Conservatives, 3, nsubj), (have, 3, aux), (s..."
6223,"However, former prosecutor Andrew McCarthy, no...",0,"[(said, 15, 0, root)]","[(However, ADV), (,, PUNCT), (former, ADJ), (p...","[(Andrew McCarthy, PERSON), (National Review, ...","[(However, 15, advmod), (,, 1, punct), (former..."
6224,Dunham’s accusing the left of “hostility” and ...,0,"[(accusing, 3, 0, root), (comes, 14, 3, conj)]","[(Dunham, PROPN), (’s, AUX), (accusing, VERB),...","[(Dunham’s, PERSON), (thousands, CARDINAL), (T...","[(Dunham, 3, nsubj), (’s, 3, aux), (accusing, ..."
6225,"In the aftermath of the attack, Thoiry is warn...",0,"[(warning, 10, 0, root)]","[(In, ADP), (the, DET), (aftermath, NOUN), (of...","[(Thoiry, PERSON), (European, NORP)]","[(In, 3, case), (the, 3, det), (aftermath, 10,..."


In [16]:
def check_agent_validity(agent, row):

    agent_is_valid = False
    entities = row['entities']
    valid_ent_labels = ["PERSON", "NORP", "ORG", "GPE"]
    valid_additional_words = [
        "accountant", "administration", "advisor", "agent", "ambassador", "attorney", "bureaucrat", 
        "candidate", "chancellor", "chief", "commissioner", "commissioner", "congress", 
        "congressman", "congresswoman", "council", "councillor", "court", "deputy", "diplomat", 
        "executive", "gentleman", "governor", "head", "house", "immigrants", "inspector", 
        "judge", "lady", "legislator", "manager", "man", "mayor", "member", "minister", "ministry", 
        "monarchy", "mr.", "mrs.", "ms.", "officer", "office", "ombudsman", 
        "parliament", "parliamentary", "person", "police", "president", "prosecutor", "representative", 
        "secretary", "secretary", "senate", "senator", "speaker", "whip", "woman", 
        "he", "she", "they", "i", "we", "you",
        "anyone", "anybody", "everyone", "everybody", "someone", "somebody", "no one", "nobody", "each", "both", "few", "many", "several", "some", 
        "this", "that", "these", "those"
    ]
    
    # Check if the related_word is a valid agent based on NER and additional terms
    for entity, label in entities: 
        if agent in entity and label in valid_ent_labels:  
            agent_is_valid = True  # Valid agent based on NER

    # Check if the word is in additional valid agent words
    if not agent_is_valid and agent.lower() in valid_additional_words:
        agent_is_valid = True  
    
    return agent_is_valid

def find_valid_agent(verb_combo, row):
    
    passive = False
    agent_is_valid = False
    valid_agent = None
    verb, own_index, head_index, tag = verb_combo
    dependencies = row['dependencies']

    for agent_combo in dependencies:
        
        if len(agent_combo) == 3:
            agent, agent_head, agent_tag = agent_combo

            # Not Passive
            if agent_head == own_index and agent_tag in ['nsubj']:
                agent_is_valid = check_agent_validity(agent, row)
            else:
                # Checking if the agent of the root of that verb is valid
                if agent_head == head_index and agent_tag in ['nsubj']: 
                    agent_is_valid = check_agent_validity(agent, row)
            
            # Passive
            if agent_is_valid == False:
                if agent_head == own_index and agent_tag in ['nsubj:pass']:
                    passive = True
                    agent_is_valid = True
                else:
                    # Checking if the agent of the root of that verb is valid
                    if agent_head == head_index and agent_tag in ['nsubj:pass']: 
                        passive = True
                        agent_is_valid = True

            if agent_is_valid == True:
                valid_agent = agent
                return valid_agent, passive

    return valid_agent, passive

#############################################################################

def find_valid_object(verb_combo, row):
    
    valid_object = None
    dependencies = row['dependencies']
    verb, own_index, head_index, tag = verb_combo

    # Priority of object tags: obj - xcomp_obj - iobj - xcomp_iobj - obl - xcomp_obl
    
    for object_combo in dependencies:
        if len(object_combo) == 3:
            object, object_head, object_tag = object_combo
            if object_head == own_index and object_tag == 'obj':
                valid_object = object
                return valid_object

    for i, xcomp_combo in enumerate(dependencies):
        if len(xcomp_combo) == 3:
            xcomp, xcomp_head, xcomp_tag = xcomp_combo
            if xcomp_head == own_index and xcomp_tag == 'xcomp':
                for related_to_xcomp in dependencies:
                    if len(related_to_xcomp) == 3:
                        related_to_xcomp_object, related_to_xcomp_object_head, related_to_xcomp_object_tag = related_to_xcomp
                        if related_to_xcomp_object_head == i+1 and related_to_xcomp_object_tag == 'obj':
                            valid_object = related_to_xcomp_object
                            return valid_object
    
    for object_combo in dependencies:
        if len(object_combo) == 3:
            object, object_head, object_tag = object_combo
            if object_head == own_index and object_tag == 'iobj':
                valid_object = object
                return valid_object

    for i, xcomp_combo in enumerate(dependencies):
        if len(xcomp_combo) == 3:
            xcomp, xcomp_head, xcomp_tag = xcomp_combo
            if xcomp_head == own_index and xcomp_tag == 'xcomp':
                for related_to_xcomp in dependencies:
                    if len(related_to_xcomp) == 3:
                        related_to_xcomp_object, related_to_xcomp_object_head, related_to_xcomp_object_tag = related_to_xcomp
                        if related_to_xcomp_object_head == i+1 and related_to_xcomp_object_tag == 'iobj':
                            valid_object = related_to_xcomp_object
                            return valid_object
    
    for object_combo in dependencies:
        if len(object_combo) == 3:
            object, object_head, object_tag = object_combo
            if object_head == own_index and object_tag == 'obl':
                valid_object = object
                return valid_object
    
    for i, xcomp_combo in enumerate(dependencies):
        if len(xcomp_combo) == 3:
            xcomp, xcomp_head, xcomp_tag = xcomp_combo
            if xcomp_head == own_index and xcomp_tag == 'xcomp':
                for related_to_xcomp in dependencies:
                    if len(related_to_xcomp) == 3:
                        related_to_xcomp_object, related_to_xcomp_object_head, related_to_xcomp_object_tag = related_to_xcomp
                        if related_to_xcomp_object_head == i+1 and related_to_xcomp_object_tag == 'obl':
                            valid_object = related_to_xcomp_object
                            return valid_object
    
    return valid_object

#############################################################################

def define_polarity(verb, obj):
    """
    Function to define the polarity of the verb + object combination.
    """
    context = f"{verb} {obj}"
    verb_sense = lesk(context.split(), verb, 'v')
    obj_sense = lesk(context.split(), obj, 'n')
    
    pos_score = neg_score = 0
    
    if verb_sense:
        swn_verb = swn.senti_synset(verb_sense.name())
        pos_score += swn_verb.pos_score()
        neg_score += swn_verb.neg_score()
    
    if obj_sense:
        swn_obj = swn.senti_synset(obj_sense.name())
        pos_score += swn_obj.pos_score()
        neg_score += swn_obj.neg_score()

    afinn_score = afinn.score(context)
    if afinn_score > 0:
        pos_score += afinn_score
    else:
        neg_score += abs(afinn_score)

    subj_pos = sum([1 for token in context.split() if token in opinion_lexicon.positive()])
    subj_neg = sum([1 for token in context.split() if token in opinion_lexicon.negative()])
    
    pos_score += subj_pos
    neg_score += subj_neg

    return 1 if pos_score > neg_score else 2 if neg_score > pos_score else 0

def adjust_sentiment_for_negation(row, polarity, verb_combo):
    """
    Function to adjust the sentiment polarity for negation.
    """
    word, index, head_index, tag = verb_combo
    dependencies = row['dependencies']

    for related in dependencies:
        if len(related) == 3:
            related_word, related_head, related_rel = related
            if related_head == index and related_rel in ['advmod'] and related_word in ['not', 'n’t', 'no', 'never', 'barely', 'hardly', 'scarcely', 'rarely', 'seldom', 'neither', 'nor']:
                if polarity == 1:
                    polarity = 2
                    return polarity
                if polarity == 2:
                    polarity = 1
                    return polarity
    return polarity

#############################################################################

def find_all_valid_events_and_polarities(row, all_verbs_list):

    all_valid_events_and_polarities = []
    valid_agent = None
    valid_object = None
    polarity = 0

    for verb_combo in all_verbs_list:
        verb, own_index, head_index, tag = verb_combo

        # Find Valid Agent for that verb
        valid_agent, passive = find_valid_agent(verb_combo, row)
        if valid_agent == None:
            return all_valid_events_and_polarities

        
        # Find Object for that verb
        # 2 different algorithms for passive and not passive
        if passive == False:
            # Looking for own object or over xcomp
            valid_object = find_valid_object(verb_combo, row)
            if valid_object == None:
                return all_valid_events_and_polarities

        else:
            # Passive means that nsubj:pass is an object and agent is obl:agent (obl?)
            for object_combo in row['dependencies']:
                if len(object_combo) == 3:
                    object, object_head, object_tag = object_combo
                    if object_head == own_index and object_tag == 'obl:agent':
                        valid_object = valid_agent
                        valid_agent = None
                        # Check if that obl is a valid agent
                        if check_agent_validity (object, row) == True:
                            valid_agent = object
                        if valid_agent == None:
                            return all_valid_events_and_polarities
            if valid_object == None:
                return all_valid_events_and_polarities

        
        # Define polarity of the combination verb + object
        if valid_object != None:
            polarity = define_polarity(verb, valid_object)

        
        # Adjust polarity according to the negations
        if polarity != 0:
            polarity = adjust_sentiment_for_negation(row, polarity, verb_combo)

    
        # Append triple (valid_agent, verb, valid_object) and polarity to the list
        if valid_agent != None and valid_object != None:
            all_valid_events_and_polarities.append((valid_agent, verb, valid_object, polarity))
    
    return all_valid_events_and_polarities

In [17]:
def new_algorithm(row):
    
    all_verbs_list = find_all_valid_verbs_in_row(row) # Step 1 - Find all the valid verbs in the sentence and safe them in the format: word, own_index, head_index, tag
    if len(all_verbs_list) == 0:
        return 0

    triplets_and_polarities = find_all_valid_events_and_polarities(row, all_verbs_list) # Step 2 - For each valid verb find its valid agent, object, define polarity and safe them in the format: agent, verb, object, polarity
    if len(triplets_and_polarities) == 0:
        return 0
        
    return triplets_and_polarities

In [18]:
shuffled_df['All_Triplets_and_Polarities'] = shuffled_df.apply(new_algorithm, axis=1)
shuffled_df = shuffled_df[['Sentence', 'Label', 'All_Triplets_and_Polarities'] + [col for col in shuffled_df.columns if col not in ['Sentence', 'Label', 'All_Triplets_and_Polarities']]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shuffled_df['All_Triplets_and_Polarities'] = shuffled_df.apply(new_algorithm, axis=1)


In [19]:
shuffled_df

Unnamed: 0,Sentence,Label,All_Triplets_and_Polarities,All_Valid_Verbs,tokens_pos,entities,dependencies
0,The faculty's Academic Senate threw its suppor...,0,"[(Senate, threw, support, 1)]","[(threw, 6, 0, root)]","[(The, DET), (faculty, NOUN), ('s, PART), (Aca...","[(Academic Senate, ORG), (Monday, DATE), (the ...","[(The, 2, det), (faculty, 5, nmod:poss), ('s, ..."
1,But the true scale of the movement remains unc...,2,0,"[(warning, 14, 8, conj)]","[(But, CCONJ), (the, DET), (true, ADJ), (scale...","[(Miskito, NORP), (Nicaraguan, NORP)]","[(But, 8, cc), (the, 4, det), (true, 4, amod),..."
2,(Branstad is ripping Cruz for taking the bold ...,0,"[(Branstad, ripping, Cruz, 2)]","[(ripping, 4, 0, root), (criticizing, 12, 10, ...","[((, PUNCT), (Branstad, PROPN), (is, AUX), (ri...","[(Branstad, PERSON), (Cruz, PERSON), (Iowa, GPE)]","[((, 4, punct), (Branstad, 4, nsubj), (is, 4, ..."
3,But the Marlins have failed to make the postse...,0,"[(Marlins, failed, postseason, 2), (Marlins, m...","[(failed, 5, 0, root), (marked, 18, 5, conj), ...","[(But, CCONJ), (the, DET), (Marlins, PROPN), (...","[(Marlins, ORG), (Loria, PERSON)]","[(But, 5, cc), (the, 3, det), (Marlins, 5, nsu..."
4,"There is nothing new yet on the table, but one...",1,0,"[(is, 2, 0, root), (said, 16, 2, conj)]","[(There, PRON), (is, VERB), (nothing, PRON), (...","[(one, CARDINAL), (Gannett, ORG), (Tribune Pub...","[(There, 2, expl), (is, 0, root), (nothing, 2,..."
...,...,...,...,...,...,...,...
6222,Conservatives have sided with government moves...,2,"[(Conservatives, sided, moves, 0), (Conservati...","[(sided, 3, 0, root), (expel, 8, 3, advcl), (a...","[(Conservatives, PROPN), (have, AUX), (sided, ...","[(Conservatives, NORP), (Shin, PERSON), (North...","[(Conservatives, 3, nsubj), (have, 3, aux), (s..."
6223,"However, former prosecutor Andrew McCarthy, no...",0,"[(prosecutor, said, Thursday, 0)]","[(said, 15, 0, root)]","[(However, ADV), (,, PUNCT), (former, ADJ), (p...","[(Andrew McCarthy, PERSON), (National Review, ...","[(However, 15, advmod), (,, 1, punct), (former..."
6224,Dunham’s accusing the left of “hostility” and ...,0,"[(Dunham, accusing, left, 2)]","[(accusing, 3, 0, root), (comes, 14, 3, conj)]","[(Dunham, PROPN), (’s, AUX), (accusing, VERB),...","[(Dunham’s, PERSON), (thousands, CARDINAL), (T...","[(Dunham, 3, nsubj), (’s, 3, aux), (accusing, ..."
6225,"In the aftermath of the attack, Thoiry is warn...",0,"[(Thoiry, warning, zoos, 2)]","[(warning, 10, 0, root)]","[(In, ADP), (the, DET), (aftermath, NOUN), (of...","[(Thoiry, PERSON), (European, NORP)]","[(In, 3, case), (the, 3, det), (aftermath, 10,..."


In [20]:
shuffled_df['All_Triplets_and_Polarities'].isnull().values.any()

False

In [21]:
shuffled_df['All_Valid_Verbs'].isnull().values.any()

False

# Export in Excel

In [22]:
# Export the first x rows to an Excel file
#shuffled_df.to_excel('shuffled_1_df_4th_model_100_rows.xlsx', index=False)

# ГЛАГОЛЫ ТОЖЕ МОЖНО ПРОВЕРИТЬ

In [36]:
counter = 0
for events_list in shuffled_df['All_Triplets_and_Polarities']:
    if events_list != 0:
        for event in events_list:
            if event[1] in ['do', 'does', 'did', 'done', 'doing']:
                counter = counter + 1
                print(event)

('Raif', 'did', 'something', 2)
('Russia', 'doing', 'what', 2)
('Rahm', 'done', 'it', 0)
('we', 'doing', 'things', 2)
('he', 'does', 'it', 2)
('Muslim', 'do', 'that', 2)
('Trump', 'doing', 'state', 2)
('President', 'do', 'something', 2)
('Russia', 'doing', 'what', 2)
('he', 'does', 'it', 2)
('You', 'done', 'much', 0)
('Democrats', 'did', 'party', 2)
('Arabia', 'done', 'same', 0)
('he', 'done', 'company', 0)
('he', 'doing', 'withdrawal', 2)
('Party', 'does', 'dance', 2)
('Hutchisson', 'done', 'impossible', 2)
('Sanders', 'does', 'Whatever', 2)
('Redstone', 'doing', 'what', 2)
('I', 'does', 'more', 2)
('he', 'done', 'company', 0)
('Mr.', 'did', 'homecoming', 2)
('Raif', 'did', 'something', 2)
('Ferguson', 'do', 'same', 2)


In [37]:
counter

24

In [38]:
counter / shuffled_df.shape[0]

0.003854183394893207

In [42]:
counter = 0
for events_list in shuffled_df['All_Triplets_and_Polarities']:
    if events_list != 0:
        for event in events_list:
            if event[1] in ["be", "am", "is", "are", "was", "were", "being", "been"]:
                counter = counter + 1
                print(event)
                print(events_list)

('Donald', 'been', 'convention', 1)
[('Donald', 'been', 'convention', 1), ('he', 'threatened', 'accusers', 2)]
('Donald', 'been', 'convention', 1)
[('Donald', 'been', 'convention', 1), ('he', 'threatened', 'accusers', 2)]


In [40]:
counter

2

In [41]:
counter / shuffled_df.shape[0]

0.0003211819495744339