# Import libraries

In [1]:
import nltk
import stanza
import ast
from afinn import Afinn
afinn = Afinn()
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet as wn
from nltk.corpus import verbnet as vn
from nltk.corpus import opinion_lexicon
from nltk.wsd import lesk
from nltk.corpus import wordnet
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import openpyxl

# Preprocessed Data Loading

In [2]:
# Load the data
column_names = ["Sentence", "Label", "tokens_pos", "entities", "dependencies"]
df_train_ready = pd.read_csv('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/datasets_preprocessed/df_train_shuffled.txt', sep='\t', names=column_names)
df_valid_ready = pd.read_csv('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/datasets_preprocessed/df_valid_shuffled.txt', sep='\t', names=column_names)
df_test_ready = pd.read_csv('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/datasets_preprocessed/df_test_shuffled.txt', sep='\t', names=column_names)

# Unite whole data in one dataframe
merged_df = pd.concat([df_train_ready, df_valid_ready, df_test_ready], ignore_index=True)

# Shuffle the merged dataframe
shuffled_df = merged_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Remove leading and trailing spaces in the "Sentence" column
shuffled_df['Sentence'] = shuffled_df['Sentence'].str.strip()

# First 100 rows for examples
#shuffled_df = shuffled_df.head(100)

In [3]:
shuffled_df.head()

Unnamed: 0,Sentence,Label,tokens_pos,entities,dependencies
0,"In 2011, of State Hillary Clinton promised the...",0,"[('In', 'ADP'), ('2011', 'NUM'), (',', 'PUNCT'...","[('2011', 'DATE'), ('State', 'ORG'), ('Hillary...","[('In', 2, 'case'), ('2011', 8, 'obl'), (',', ..."
1,"’ Today saw the debut of ”,” a new social expe...",0,"[('’', 'PUNCT'), ('Today', 'NOUN'), ('saw', 'V...","[('Twitch', 'PRODUCT')]","[('’', 3, 'punct'), ('Today', 3, 'nsubj'), ('s..."
2,"Singer, actor and activist Harry Belafonte, 88...",1,"[('Singer', 'NOUN'), (',', 'PUNCT'), ('actor',...","[('Harry Belafonte', 'PERSON'), ('88', 'DATE')...","[('Singer', 12, 'nsubj'), (',', 3, 'punct'), (..."
3,"Last week, Roberts said the teams medical staf...",1,"[('Last', 'ADJ'), ('week', 'NOUN'), (',', 'PUN...","[('Last week', 'DATE'), ('Roberts', 'PERSON'),...","[('Last', 2, 'amod'), ('week', 5, 'obl:tmod'),..."
4,"Indeed, I believe that Waitrose does more than...",1,"[('Indeed', 'ADV'), (',', 'PUNCT'), ('I', 'PRO...","[('Waitrose', 'ORG'), ('UK', 'GPE')]","[('Indeed', 4, 'advmod'), (',', 4, 'punct'), (..."


In [4]:
shuffled_df.isnull().values.any()

False

In [5]:
shuffled_df['Label'].value_counts()

Label
0    3390
3    1631
1     814
4     237
2     155
Name: count, dtype: int64

# Mapping Labels

In [6]:
# Mapping dictionary: 0 - neutral, 1 - positive, 2 - negative
label_mapping = {2: 1, 3: 2, 4: 2}
shuffled_df['Label'] = shuffled_df['Label'].replace(label_mapping)

In [7]:
shuffled_df.head()

Unnamed: 0,Sentence,Label,tokens_pos,entities,dependencies
0,"In 2011, of State Hillary Clinton promised the...",0,"[('In', 'ADP'), ('2011', 'NUM'), (',', 'PUNCT'...","[('2011', 'DATE'), ('State', 'ORG'), ('Hillary...","[('In', 2, 'case'), ('2011', 8, 'obl'), (',', ..."
1,"’ Today saw the debut of ”,” a new social expe...",0,"[('’', 'PUNCT'), ('Today', 'NOUN'), ('saw', 'V...","[('Twitch', 'PRODUCT')]","[('’', 3, 'punct'), ('Today', 3, 'nsubj'), ('s..."
2,"Singer, actor and activist Harry Belafonte, 88...",1,"[('Singer', 'NOUN'), (',', 'PUNCT'), ('actor',...","[('Harry Belafonte', 'PERSON'), ('88', 'DATE')...","[('Singer', 12, 'nsubj'), (',', 3, 'punct'), (..."
3,"Last week, Roberts said the teams medical staf...",1,"[('Last', 'ADJ'), ('week', 'NOUN'), (',', 'PUN...","[('Last week', 'DATE'), ('Roberts', 'PERSON'),...","[('Last', 2, 'amod'), ('week', 5, 'obl:tmod'),..."
4,"Indeed, I believe that Waitrose does more than...",1,"[('Indeed', 'ADV'), (',', 'PUNCT'), ('I', 'PRO...","[('Waitrose', 'ORG'), ('UK', 'GPE')]","[('Indeed', 4, 'advmod'), (',', 4, 'punct'), (..."


In [8]:
shuffled_df.isnull().values.any()

False

In [9]:
shuffled_df['Label'].value_counts()

Label
0    3390
2    1868
1     969
Name: count, dtype: int64

# Turning strings back to lists and tuples

In [10]:
def convert_to_list(dependencies_str):
    # Check if it's a string and if it appears to be in the list of tuples format
    if isinstance(dependencies_str, str) and dependencies_str.startswith("[") and dependencies_str.endswith("]"):
        try:
            # Convert string representation of list back to actual list of tuples
            return ast.literal_eval(dependencies_str)
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing: {dependencies_str}")
            raise e
    elif isinstance(dependencies_str, list):
        # If it's already a list, return as is
        return dependencies_str
    else:
        # If it's another unexpected type, return as is or handle appropriately
        return dependencies_str

In [11]:
# Apply the function to your datasets
shuffled_df['dependencies'] = shuffled_df['dependencies'].apply(convert_to_list)
shuffled_df['tokens_pos'] = shuffled_df['tokens_pos'].apply(convert_to_list)
shuffled_df['entities'] = shuffled_df['entities'].apply(convert_to_list)

# Following the Modified Algorithm of Blame/Praise Identification

In [12]:
# Define functions to check if a verb belongs to Foreseeability or Coercion groups

def is_foreseeability_verb(verb):
    # This function checks whether a verb belongs to a predefined set of foreseeability-related verb classes.
    foreseeability_classes = {'communication', 'creation', 'consumption', 'competition', 'possession', 'motion'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lexname = synset.lexname().split('.')[1]  # Extracts the lexical category (i.e., type of action)
        if lexname in foreseeability_classes:  # Checks if the lexical category is in the foreseeability class
            return True  # Returns True if the verb matches any foreseeability category
    return False  # If no match is found, returns False


def is_coercion_verb(verb):
    # This function checks whether a verb belongs to a predefined set of coercion-related VerbNet classes.
    coercion_classes = {'urge-58.1', 'force-59', 'forbid-67'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lemma = synset.lemmas()[0]  # Gets the first lemma for each synset
        vn_classes = lemma.key().split('%')[0]  # Extracts the lemma key
        vn_class_ids = vn.classids(vn_classes)  # Fetches the VerbNet classes for the lemma
        if any(vn_class in coercion_classes for vn_class in vn_class_ids):  # Checks for a match in coercion classes
            return True  # If a match is found in coercion classes, return True
    return False  # If no match is found, return False

In [13]:
def is_valid_verb(word, tokens_pos):
    """
    Check if the given word is a verb and passes the foreseeability and coercion checks.
    """
    # Check if the word is a verb using tokens_pos
    for token, pos in tokens_pos:
        if token == word and 'VERB' in pos:  # Ensure the word is tagged as a verb
            # Now check if it passes foreseeability and coercion checks
            if is_foreseeability_verb(word) and not is_coercion_verb(word):
                return True
    return False


def check_conjunctions(verb_type_verbs, related_word, counter_j, index, dependencies, tokens_pos):
    """
    Helper function to check and append conjunctions for specific verb types (xcomp, ccomp, parataxis, advcl).
    It modifies the original verb list (e.g., xcomp_verbs, ccomp_verbs) by adding conjunctions directly.
    """
    counter_x = 0
    for conj in dependencies:
        if len(conj) == 3:
            conj_word, conj_head, conj_rel = conj
            counter_x += 1
            # Reset counter for punctuation after root - end of the sentence
            if conj_rel == 'punct' and (conj_word == "." or conj_word == ":"):
                counter_x = 0
            # Check if the conj word is a valid verb related to the current relation
            if conj_head == counter_j and conj_rel == 'conj' and is_valid_verb(related_word, tokens_pos):
                verb_type_verbs.append((conj_word, counter_x, index))  # Append conjunction to the respective verb list


def handle_related_verbs(related_rel, related_word, counter_j, index, dependencies, tokens_pos, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs):
    """
    Helper function to handle related verbs (xcomp, ccomp, parataxis, advcl).
    Depending on the relation type, it adds the verb to the appropriate list and handles its conjunctions.
    """
    if related_rel == 'xcomp':
        xcomp_verbs.append((related_word, counter_j, index))  # xcomp relation to root
        check_conjunctions(xcomp_verbs, related_word, counter_j, index, dependencies, tokens_pos)

    elif related_rel == 'ccomp':
        ccomp_verbs.append((related_word, counter_j, index))  # ccomp relation
        check_conjunctions(ccomp_verbs, related_word, counter_j, index, dependencies, tokens_pos)

    elif related_rel == 'parataxis':
        parataxis_verbs.append((related_word, counter_j, index))  # parataxis relation
        check_conjunctions(parataxis_verbs, related_word, counter_j, index, dependencies, tokens_pos)

    elif related_rel == 'advcl':
        advcl_verbs.append((related_word, counter_j, index))  # advcl relation
        check_conjunctions(advcl_verbs, related_word, counter_j, index, dependencies, tokens_pos)

In [14]:
def step_one_function(row):
    """
    Main function to find valid verbs (root, xcomp, ccomp, parataxis, advcl, and their conjunctions).
    """
    dependencies = row['dependencies']  # Dependency relations for the sentence
    tokens_pos = row['tokens_pos']  # POS-tagged tokens for the sentence

    counter_i = 0  # Counter for tracking the index of words in the dependency structure
    
    # Lists to store categorized verbs
    roots = []  # (word, own index, main root), if root is root (not conj) - write its own index
    root_verbs = []  # For valid root verbs (that pass foreseeability and coercion checks)
    xcomp_verbs = []  # For valid xcomp verbs
    ccomp_verbs = []  # For valid ccomp verbs
    parataxis_verbs = []  # For valid parataxis verbs
    advcl_verbs = []  # For valid advcl verbs

    # Iterate through dependencies to identify roots and their related verbs
    for dep in dependencies:
        if len(dep) == 3:
            word, head, deprel = dep  # Unpacking the dependency tuple (word, head, relation)
            counter_i += 1  # Increment the index counter for this word

            # Reset counter when punctuation is found after root
            if roots:
                if deprel == 'punct' and (word == "." or word == ":") and head == roots[0][1]:
                    counter_i = 0  

            # Check if the current word is the root of the sentence
            if deprel == 'root':
                roots.append((word, counter_i, counter_i))  # Add the root verb and its index
                if is_valid_verb(word, tokens_pos):  # Check if the root is a valid verb
                    root_verbs.append((word, counter_i, counter_i))  # Append valid root verb

                # Looking for related conjunctions
                counter_j = 0
                for related in dependencies:
                    if len(related) == 3:
                        related_word, related_head, related_rel = related
                        counter_j += 1  # Increment index for related word
                        # Reset the counter for punctuation after root
                        if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                            counter_j = 0
                        # Look for conjunctions attached to the root verb
                        if related_head == counter_i and related_rel == 'conj' and is_valid_verb(related_word, tokens_pos):
                            roots.append((related_word, counter_j, counter_i))  # Add root conj
                            root_verbs.append((related_word, counter_j, counter_i))  # Append valid conj relation

    # Find related verbs (xcomp, ccomp, etc.) for root verbs and their conjunctions
    for verb in roots:
        word, index, head_index = verb
        counter_j = 0
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related
                counter_j += 1
                # Reset the counter for punctuation after root
                if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                    counter_j = 0
                # Handle xcomp, ccomp, parataxis, and advcl relations
                if related_head == index and related_rel in ['xcomp', 'ccomp', 'parataxis', 'advcl'] and is_valid_verb(related_word, tokens_pos):
                    handle_related_verbs(related_rel, related_word, counter_j, index, dependencies, tokens_pos, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs)

    #print()
    #print('NEW ROW')
    #print('Valid root verbs: ', root_verbs)
    #print('Valid xcomp verbs: ', xcomp_verbs)
    #print('Valid ccomp verbs: ', ccomp_verbs)
    #print('Valid parataxis verbs: ', parataxis_verbs)
    #print('Valid advcl verbs: ', advcl_verbs)
    
    return roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs

In [15]:
def check_agent_validity(related_word, row, tokens_pos):
    """
    Function to check the validity of an agent based on NER tags and additional rules.
    """
    entities = row['entities']
    valid_ent_labels = ["PERSON", "NORP", "ORG", "GPE"]
    valid_additional_words = [
        "person", "man", "woman", "police", "administration", "immigrants", "president", "minister", "senator", 
        "representative", "governor", "mayor", "council", "secretary", "ambassador", "chancellor", "ministry", "monarchy",
        "parliamentary", "mr.", "ms.", "mrs."
    ]

    self = False
    agent_is_valid = False
    
    # Original logic: Check if the related_word is a valid agent based on NER and additional terms
    for entity, label in entities: 
        if related_word in entity and label in valid_ent_labels:  
            agent_is_valid = True  # Valid agent based on NER
    
    # Check if it's a pronoun
    if not agent_is_valid and 'PRON' in [pos for token, pos in tokens_pos if token == related_word]: 
        agent_is_valid = True  
        # Logic for handling "self" reference (i.e., "I" or "we") - to be changed
        #if related_word.lower() == "i" or related_word.lower() == "we":
            #self = True

    # Check if the word is in additional valid agent words
    if not agent_is_valid and related_word.lower() in valid_additional_words:
        agent_is_valid = True  
    #if agent_is_valid:
        #print("The Agent is valid: ", related_word)
    
    return agent_is_valid, self


def check_causative_verb(verb):
    """
    Function to check if the verb is causative, i.e., if it belongs to the 'cause' or 'CAUSETO' class.
    """
    for synset in wn.synsets(verb, pos=wn.VERB):
        if 'cause' in synset.lemma_names():
            #print("found cause lemma")
            return True
        for lemma in synset.lemmas():
            for frame in lemma.frame_strings():
                if 'CAUSE' in frame or 'CAUSETO' in frame:
                    #print("found cause and causeto frame strings")
                    return True
    return False


def define_polarity(verb, obj):
    """
    Function to define the polarity of the verb + object combination.
    Maybe use Word Sense Disambiguation (WSD) here?
    """
    context = f"{verb} {obj}"
    verb_sense = lesk(context.split(), verb, 'v')
    obj_sense = lesk(context.split(), obj, 'n')
    
    pos_score = neg_score = 0
    
    if verb_sense:
        swn_verb = swn.senti_synset(verb_sense.name())
        pos_score += swn_verb.pos_score()
        neg_score += swn_verb.neg_score()
    
    if obj_sense:
        swn_obj = swn.senti_synset(obj_sense.name())
        pos_score += swn_obj.pos_score()
        neg_score += swn_obj.neg_score()

    afinn_score = afinn.score(context)
    if afinn_score > 0:
        pos_score += afinn_score
    else:
        neg_score += abs(afinn_score)

    subj_pos = sum([1 for token in context.split() if token in opinion_lexicon.positive()])
    subj_neg = sum([1 for token in context.split() if token in opinion_lexicon.negative()])
    
    pos_score += subj_pos
    neg_score += subj_neg

    return 1 if pos_score > neg_score else 2 if neg_score > pos_score else 0


def adjust_sentiment_for_negation(row, polarity, verb):
    """
    Function to adjust the sentiment polarity for negation.
    """
    word, index, head_index = verb
    dependencies = row['dependencies']

    for related in dependencies:
        if len(related) == 3:
            related_word, related_head, related_rel = related
            if related_head == index and related_rel in ['advmod'] and (related_word == 'not' or related_word == 'n’t' or related_word == 'never'):
                #print("Had found Negation")
                if polarity == 1:
                    polarity = 2
                    #print("Final Polarity: ", polarity)
                    return polarity
                if polarity == 2:
                    polarity = 1
                    #print("Final Polarity: ", polarity)
                    return polarity

    #print("Final Polarity: ", polarity)
    
    return polarity



def handle_special_cases_for_xcomp_in_ccomp(row, verb, dependencies, tokens_pos, counter_j, related_word):
    """
    Handle special cases for xcomp connected to ccomp, looking for objects connected to xcomp.
    
    for related_to_xcomp in dependencies:
        if len(related_to_xcomp) == 3:
            related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp
            if related_to_xcomp_head == counter_j and related_to_xcomp_rel in ['obj']:
                print("The Object is valid: ", related_to_xcomp_word)
                # Define polarity of the combination xcomp + object
                polarity = define_polarity(related_word, related_to_xcomp_word)
                polarity = adjust_sentiment_for_negation(row, polarity, related_to_xcomp)
                if polarity != 0:
                    return polarity
    for related_to_xcomp in dependencies:
        if len(related_to_xcomp) == 3:
            related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp
            if related_to_xcomp_head == counter_j and related_to_xcomp_rel in ['iobj']:
                print("The Object is valid: ", related_to_xcomp_word)
                # Define polarity of the combination xcomp + object
                polarity = define_polarity(related_word, related_to_xcomp_word)
                polarity = adjust_sentiment_for_negation(row, polarity, related_to_xcomp)
                if polarity != 0:
                    return polarity
    for related_to_xcomp in dependencies:
        if len(related_to_xcomp) == 3:
            related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp
            if related_to_xcomp_head == counter_j and related_to_xcomp_rel in ['obl']:
                print("The Object is valid: ", related_to_xcomp_word)
                # Define polarity of the combination xcomp + object
                polarity = define_polarity(related_word, related_to_xcomp_word)
                polarity = adjust_sentiment_for_negation(row, polarity, related_to_xcomp)
                if polarity != 0:
                    return polarity

    """
    for i, related in enumerate(dependencies):
        if len(related) == 3:
            related_word, related_head, related_rel = related
            if related_head == verb[1] and related_rel in ['xcomp']:
                #print("Found xcomp to ccomp: ", i, related_word, related_head, related_rel)
                for related_to_xcomp in dependencies:
                    if len(related) == 3:
                        related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp
                        # Maintain order of processing 'obj', 'iobj', and 'obl'
                        if related_to_xcomp_head == (i+1) and related_to_xcomp_rel in ['obj']:
                            polarity = define_polarity(verb[0], related_to_xcomp_word)
                            #print("The Object is valid: ", related_to_xcomp_word)
                            return adjust_sentiment_for_negation(row, polarity, verb)
                for related_to_xcomp in dependencies:
                    if len(related) == 3:
                        related_word, related_head, related_rel = related_to_xcomp
                        if related_to_xcomp_head == (i+1) and related_to_xcomp_rel in ['iobj']:
                            polarity = define_polarity(verb[0], related_to_xcomp_word)
                            #print("The Object is valid: ", related_to_xcomp_word)
                            return adjust_sentiment_for_negation(row, polarity, verb)
                for related_to_xcomp in dependencies:
                    if len(related) == 3:
                        related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp
                        if related_to_xcomp_head == (i+1) and related_to_xcomp_rel in ['obl']:
                            polarity = define_polarity(verb[0], related_to_xcomp_word)
                            #print("The Object is valid: ", related_to_xcomp_word)
                            return adjust_sentiment_for_negation(row, polarity, verb)
    return 0




def process_ccomp_verb(row, verb, dependencies, tokens_pos, roots):
    """
    Process ccomp verbs and handle normal cases and special cases like `obl:agent` and `nsubj:pass`.
    """
    word, index, head_index = verb
    agent_is_valid, self = False, False
    agent_is_obl = False  # Track if the agent comes from an `obl:agent`

    # Find an agent connected to the ccomp verb (normal or obl:agent case)
    for related in dependencies:
        if len(related) == 3:
            related_word, related_head, related_rel = related
            # Check for `nsubj` as agent
            if related_head == index and related_rel in ['nsubj']:
                agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)

            # Special case: `obl:agent` becomes the agent
            elif related_head == index and related_rel in ['obl:agent', 'obl']:
                agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                agent_is_obl = True  # Mark that the agent is an `obl`

    # If no valid agent, check for causative verbs
    #if not agent_is_valid and check_causative_verb(word):
        #agent_is_valid = True

    # Object processing priority: `obj`, `iobj`
    if agent_is_valid:
        counter_j = 0
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related
                counter_j += 1
                # Reset counter for punctuation
                if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                    counter_j = 0
                
                # Normal case: Handle objects connected to the ccomp verb
                if related_head == index and related_rel in ['obj', 'iobj'] and not agent_is_obl:
                    #print("The Object is valid: ", related_word)
                    polarity = define_polarity(word, related_word)
                    polarity = adjust_sentiment_for_negation(row, polarity, verb)
                    if polarity != 0:
                        return f"self - {polarity}" if self else polarity

                # Special case: When `obl:agent` is present, `nsubj:pass` becomes the object
                if agent_is_obl and related_head == index and related_rel == 'nsubj:pass':
                    #print("The Object is valid: ", related_word)
                    polarity = define_polarity(word, related_word)
                    polarity = adjust_sentiment_for_negation(row, polarity, verb)
                    if polarity != 0:
                        return f"self - {polarity}" if self else polarity

                # Handle xcomp connected to ccomp and check objects within xcomp
                polarity = handle_special_cases_for_xcomp_in_ccomp(row, verb, dependencies, tokens_pos, counter_j, related_word)
                if polarity != 0:
                    return f"self - {polarity}" if self else polarity

    return 0



def find_object_and_define_polarity(row, verb, agent_is_valid, tokens_pos):
    """
    Helper function to find the object for a given verb and define its polarity.
    """
    if agent_is_valid:
        dependencies = row['dependencies']

        # xcomp case
        for i, related in enumerate(dependencies):
            if len(related) == 3:
                related_word, related_head, related_rel = related
                if related_head == verb[1] and related_rel in ['xcomp']:
                    #print("Found xcomp to ccomp: ", i, related_word, related_head, related_rel)
                    for related_to_xcomp in dependencies:
                        if len(related) == 3:
                            related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp
                            # Maintain order of processing 'obj', 'iobj', and 'obl'
                            if related_to_xcomp_head == (i+1) and related_to_xcomp_rel in ['obj']:
                                polarity = define_polarity(verb[0], related_to_xcomp_word)
                                #print("The Object is valid: ", related_to_xcomp_word)
                                return adjust_sentiment_for_negation(row, polarity, verb)
                    for related_to_xcomp in dependencies:
                        if len(related) == 3:
                            related_word, related_head, related_rel = related_to_xcomp
                            if related_to_xcomp_head == (i+1) and related_to_xcomp_rel in ['iobj']:
                                polarity = define_polarity(verb[0], related_to_xcomp_word)
                                #print("The Object is valid: ", related_to_xcomp_word)
                                return adjust_sentiment_for_negation(row, polarity, verb)
                    for related_to_xcomp in dependencies:
                        if len(related) == 3:
                            related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp
                            if related_to_xcomp_head == (i+1) and related_to_xcomp_rel in ['obl']:
                                polarity = define_polarity(verb[0], related_to_xcomp_word)
                                #print("The Object is valid: ", related_to_xcomp_word)
                                return adjust_sentiment_for_negation(row, polarity, verb)
        
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related
                # Maintain order of processing 'obj', 'iobj', and 'obl'
                if related_head == verb[1] and related_rel in ['obj']:
                    polarity = define_polarity(verb[0], related_word)
                    #print("The Object is valid: ", related_word)
                    return adjust_sentiment_for_negation(row, polarity, verb)
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related
                if related_head == verb[1] and related_rel in ['iobj']:
                    polarity = define_polarity(verb[0], related_word)
                    #print("The Object is valid: ", related_word)
                    return adjust_sentiment_for_negation(row, polarity, verb)
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related
                if related_head == verb[1] and related_rel in ['obl']:
                    polarity = define_polarity(verb[0], related_word)
                    #print("The Object is valid: ", related_word)
                    return adjust_sentiment_for_negation(row, polarity, verb)
                    
        # Handle objects connected to xcomp
        #polarity = handle_special_cases_for_xcomp_in_ccomp(row, verb, dependencies, tokens_pos, counter_j, related_word)
        #if polarity != 0:
            #return f"self - {polarity}" if self else polarity

    return 0


def process_verb_connections(row, verbs, tokens_pos, self=False):
    """
    Generalized function to process verb connections such as root_verbs, xcomp_verbs, etc.
    """
    result = None
    for verb in verbs:
        word, index, head_index = verb
        agent_is_valid = False

        # Original logic for agent validation
        for related in row['dependencies']:
            if len(related) == 3:
                related_word, related_head, related_rel = related
                #if related_head == index and related_rel in ['nsubj', 'nsubj:pass']:
                if related_head == index and related_rel in ['nsubj']:
                    agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                else:
                    # Checking if the agent of the root of that verb is valid
                    if related_head == head_index and related_rel in ['nsubj']: 
                        agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
        
        # If agent is not valid, check causative verb
        #if not agent_is_valid and check_causative_verb(word):
            #agent_is_valid = True

        # Use original priority order for object detection
        polarity = find_object_and_define_polarity(row, verb, agent_is_valid, tokens_pos)
        if polarity != 0:
            #return f"self - {polarity}" if self else polarity, verb
            return polarity, verb

    return result, None

In [16]:
def step_two_function(row, roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs):
    """
    Main function to decide on Agent Causality, find the object, decide on Polarity, and classify the row.
    """
    tokens_pos = row['tokens_pos']
    dependencies = row['dependencies']

    # Process root verbs
    result, verb = process_verb_connections(row, root_verbs, tokens_pos)
    if result:
        if result == 1 or result == 2:
            return result, verb
        return result, None

    # Process ccomp verbs with priority handling and special cases
    for verb in ccomp_verbs:
        result, verb = process_ccomp_verb(row, verb, dependencies, tokens_pos, roots)
        if result:
            if result == 1 or result == 2:
                return result, verb
            return result, None

    # Process advcl verbs
    result, verb = process_verb_connections(row, advcl_verbs, tokens_pos)
    if result:
        if result == 1 or result == 2:
            return result, verb
        return result, None
        
    # Process parataxis verbs
    result, verb = process_verb_connections(row, parataxis_verbs, tokens_pos)
    if result:
        if result == 1 or result == 2:
            return result, verb
        return result, None
    
    # Process xcomp verbs
    result, verb = process_verb_connections(row, xcomp_verbs, tokens_pos)
    if result:
        if result == 1 or result == 2:
            return result, verb
        return result, None

    return 0, None

In [17]:
def label_the_row(row):
    result = 0
    # This is the main function to process each row of data and classify the row 

    # 1 - Find all the related verbs in categories in dependency column 'root', 'xcomp', 'ccomp', 'parataxis', 'advcl', 'conj' (is a verb check - foreseeability check - coercion check)
    roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs = step_one_function(row)
    
    # 2 - If at least one of the lists is not empty - can proceed
    if root_verbs or xcomp_verbs or ccomp_verbs or parataxis_verbs or advcl_verbs:
        
        # 3 - Take a final decision about the label (0 - others, 1 - positive, 2 - negative)
        return step_two_function(row, roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs)
    
    else:
        return 0, None

In [18]:
# Apply the function to the dataset
shuffled_df['Final_Result'], shuffled_df['Event_Verb'] = shuffled_df.apply(label_the_row, axis=1)
shuffled_df = shuffled_df[['Sentence', 'Label', 'Final_Result', 'Event_Verb'] + [col for col in shuffled_df.columns if col not in ['Sentence', 'Label', 'Final_Result', 'Event_Verb']]]

TypeError: cannot unpack non-iterable int object

In [None]:
shuffled_df.head(10)

In [None]:
shuffled_df['Final_Result'].value_counts()

# Evaluation

In [None]:
# Extract true labels and predicted labels
y_true = shuffled_df['Label']
y_pred = shuffled_df['Final_Result']

In [None]:
# Assuming you have a DataFrame with 'Label' as true labels and 'Final_Result' as predicted labels

# Calculate F1 Scores
f1_micro = f1_score(y_true, y_pred, average='micro')
f1_macro = f1_score(y_true, y_pred, average='macro')
f1_weighted = f1_score(y_true, y_pred, average='weighted')

# Calculate Precision and Recall for completeness (optional)
precision_micro = precision_score(y_true, y_pred, average='micro')
precision_macro = precision_score(y_true, y_pred, average='macro')
recall_micro = recall_score(y_true, y_pred, average='micro')
recall_macro = recall_score(y_true, y_pred, average='macro')

# Create a DataFrame to display the results
results_df = pd.DataFrame({
    'Metric': ['F1 Score', 'Precision', 'Recall'],
    'Micro-average': [f1_micro, precision_micro, recall_micro],
    'Macro-average': [f1_macro, precision_macro, recall_macro],
    'Weighted-average': [f1_weighted, None, None]  # Weighted average only applicable to F1 score here
})

# Display the table
print(results_df)

# You can also use classification report to see more detailed metrics
print(classification_report(y_true, y_pred))

# Export in Excel

In [None]:
# Export the first 300 rows to an Excel file
#shuffled_df.head(100).to_excel('shuffled_df_new_100_rows.xlsx', index=False)

# False Positives

In [None]:
shuffled_df.head(5)

In [None]:
# False positive: Label is 0, but Final_Result is 1 or 2
false_positives = shuffled_df[(shuffled_df['Label'] == 0) & (shuffled_df['Final_Result'].isin([1, 2]))]

# Count the number of false positives
false_positive_count = false_positives.shape[0]

In [None]:
false_positives

In [None]:
false_positive_count

In [None]:
false_positive_count/shuffled_df.shape[0]