# Import libraries

In [1]:
import nltk
import stanza
import ast
from afinn import Afinn
afinn = Afinn()
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet as wn
from nltk.corpus import verbnet as vn
from nltk.corpus import opinion_lexicon
from nltk.wsd import lesk
from nltk.corpus import wordnet
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import openpyxl

# Preprocessed Data Loading

In [2]:
# Load the data
column_names = ["Sentence", "Label", "tokens_pos", "entities", "senses", "dependencies", "swn_scores", "afinn_score", "subj_scores", "final_sentiment", "negations", "final_sentiment_adj"]

df_train_preprocessed = pd.read_csv('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/df_train_shuffled.txt', sep='\t', names=column_names)
df_valid_preprocessed = pd.read_csv('C:/Users/Anastasiia Belkina/MANNHEIM/MASTER_THESIS_CODE/Rule-Based Classifier/df_valid_shuffled.txt', sep='\t', names=column_names)

# Remove leading and trailing spaces in the "Sentence" column
df_train_preprocessed['Sentence'] = df_train_preprocessed['Sentence'].str.strip()
df_valid_preprocessed['Sentence'] = df_valid_preprocessed['Sentence'].str.strip()

# Delete columns "subj_scores", "final_sentiment", "negations", "final_sentiment_adj"
df_train_ready = df_train_preprocessed.drop(columns = ["senses", "swn_scores", "afinn_score", "subj_scores", "final_sentiment", "negations", "final_sentiment_adj"])
df_valid_ready = df_valid_preprocessed.drop(columns = ["senses", "swn_scores", "afinn_score", "subj_scores", "final_sentiment", "negations", "final_sentiment_adj"])

# Shuffle the data
#df_train_ready = df_train_preprocessed.sample(frac=1).reset_index(drop=True)
#df_valid_ready = df_valid_preprocessed.sample(frac=1).reset_index(drop=True)

# Merging Labels

In [3]:
# Mapping dictionary
label_mapping = {2: 1, 3: 2, 4: 2}

# 0 - neutral, 1 - positive, 2 - negative

df_train_ready_merged = df_train_ready
df_valid_ready_merged = df_valid_ready

# Apply the mapping to the 'Label' column
df_train_ready_merged['Label'] = df_train_ready_merged['Label'].replace(label_mapping)
df_valid_ready_merged['Label'] = df_valid_ready_merged['Label'].replace(label_mapping)

# Turning strings back to lists and tuples

In [4]:
def convert_to_list(dependencies_str):
    # Check if it's a string and if it appears to be in the list of tuples format
    if isinstance(dependencies_str, str) and dependencies_str.startswith("[") and dependencies_str.endswith("]"):
        try:
            # Convert string representation of list back to actual list of tuples
            return ast.literal_eval(dependencies_str)
        except (ValueError, SyntaxError) as e:
            print(f"Error parsing: {dependencies_str}")
            raise e
    elif isinstance(dependencies_str, list):
        # If it's already a list, return as is
        return dependencies_str
    else:
        # If it's another unexpected type, return as is or handle appropriately
        return dependencies_str

def get_sense(tokens):
    #print(tokens)
    senses = []
    for item in tokens:
        #print(item)
        if isinstance(item, tuple) and len(item) == 2:
            token, pos = item
            sense = lesk([token], token)
            senses.append((token, sense))
        else:
            # Handle cases where the token doesn't meet the expected structure
            print(f"Unexpected format: {item}")
            senses.append((item, None))
    return senses

In [5]:
# Apply the function to your datasets
df_train_ready_merged['dependencies'] = df_train_ready_merged['dependencies'].apply(convert_to_list)
df_valid_ready_merged['dependencies'] = df_valid_ready_merged['dependencies'].apply(convert_to_list)
df_train_ready_merged['tokens_pos'] = df_train_ready_merged['tokens_pos'].apply(convert_to_list)
df_valid_ready_merged['tokens_pos'] = df_valid_ready_merged['tokens_pos'].apply(convert_to_list)
df_train_ready_merged['entities'] = df_train_ready_merged['entities'].apply(convert_to_list)
df_valid_ready_merged['entities'] = df_valid_ready_merged['entities'].apply(convert_to_list)
#df_train_ready_merged['senses'] = df_train_ready_merged['tokens_pos'].apply(get_sense)
#df_valid_ready_merged['senses'] = df_valid_ready_merged['tokens_pos'].apply(get_sense)

# Making a small set for tests

In [6]:
df_train_ready_merged_small = df_train_ready_merged.head(10)
df_valid_ready_merged_small = df_valid_ready_merged.head(10)

# Following the Modified Algorithm of Blame/Praise Identification

## Older versions

In [7]:
"""
    # Step 2: Process root verbs
    for i, root in enumerate(roots):
        root_verb, root_index = root
        #print(root_verb, root_index)
        # Check if the root verb is valid (foreseeability and not coercion)
        for token, pos in tokens_pos:
            #print(token, pos)
            if token == root_verb and pos == 'VERB':
                #print("Found VERB")
                if is_foreseeability_verb(root_verb) and not is_coercion_verb(root_verb):
                    #print("F and not C") 
                    root_verbs.append((root_verb, i+1))
        # Check for any conj attached to this root verb and add it if valid
        for j, conj in enumerate(dependencies):
            conj_word, conj_head, conj_rel = conj
            if conj_head == root_index and conj_rel == 'conj':
                for token_conj, pos_conj in tokens_pos:
                    if token_conj == conj_word and 'VERB' in pos_conj:
                        if is_foreseeability_verb(conj_word) and not is_coercion_verb(conj_word):
                            root_verbs.append((conj_word, j+1))
    
    #print(root_verbs)
"""


'\n    # Step 2: Process root verbs\n    for i, root in enumerate(roots):\n        root_verb, root_index = root\n        #print(root_verb, root_index)\n        # Check if the root verb is valid (foreseeability and not coercion)\n        for token, pos in tokens_pos:\n            #print(token, pos)\n            if token == root_verb and pos == \'VERB\':\n                #print("Found VERB")\n                if is_foreseeability_verb(root_verb) and not is_coercion_verb(root_verb):\n                    #print("F and not C") \n                    root_verbs.append((root_verb, i+1))\n        # Check for any conj attached to this root verb and add it if valid\n        for j, conj in enumerate(dependencies):\n            conj_word, conj_head, conj_rel = conj\n            if conj_head == root_index and conj_rel == \'conj\':\n                for token_conj, pos_conj in tokens_pos:\n                    if token_conj == conj_word and \'VERB\' in pos_conj:\n                        if is_foreseeabi

In [8]:
'''
# The main function to process each sentence
def find_valid_verbs(row):
    print("NEW ROW")
    
    dependencies = row['dependencies']
    tokens_pos = row['tokens_pos']

    counter_i = 0
    
    # Lists to store categorized verbs
    root_verbs = []
    xcomp_verbs = []
    ccomp_verbs = []
    parataxis_verbs = []
    advcl_verbs = []
    
    # Step 1: Identify all root verbs and their indices
    #roots = []
    for i, dep in enumerate(dependencies):
        if len(dep) == 3:
            word, head, deprel = dep
            if deprel == 'root':
                #roots.append((word, i + 1))  # Save the root verb and its index (i+1)
                for token, pos in tokens_pos:
                    if token == word and pos == 'VERB':
                        #print("Found VERB")
                        if is_foreseeability_verb(word) and not is_coercion_verb(word):
                            #print("F and not C") 
                            root_verbs.append((word, i+1))
                # Check for any conj attached to this root verb and add it if valid
                for j, conj in enumerate(dependencies):
                    conj_word, conj_head, conj_rel = conj
                    if conj_head == i+1 and conj_rel == 'conj':
                        for token_conj, pos_conj in tokens_pos:
                            if token_conj == conj_word and 'VERB' in pos_conj:
                                if is_foreseeability_verb(conj_word) and not is_coercion_verb(conj_word):
                                    root_verbs.append((conj_word, j+1))
    #print(root_verbs)

    
    # TILL HERE WORKS 
    
    # ВОПРОС С БУМАЖКИ
    
    # Step 3: Process each root verb and find related tags (xcomp, ccomp, conj, parataxis, advcl)
    for root_verb, root_index in root_verbs:
        for i, dep in enumerate(dependencies):
            if len(dep) == 3:
                dep_word, dep_head, dep_rel = dep
                
                # Handle each type of relation (xcomp, ccomp, parataxis, advcl)
                #print(dep_head, root_index, dep_rel)
                if dep_head == root_index and dep_rel in ['xcomp', 'ccomp', 'parataxis', 'advcl']:
                    #print("found one of xcomp, ccomp, parataxis, advcl")
                    #print(dep_word, dep_head, dep_rel)
                    for token, pos in tokens_pos:
                        if token == dep_word and 'VERB' in pos:
                            #print(token, pos)
                            if is_foreseeability_verb(dep_word) and not is_coercion_verb(dep_word):
                                # Add to the appropriate list based on dep_rel
                                if dep_rel == 'xcomp':
                                    xcomp_verbs.append(dep_word)
                                elif dep_rel == 'ccomp':
                                    ccomp_verbs.append(dep_word)
                                elif dep_rel == 'parataxis':
                                    parataxis_verbs.append(dep_word)
                                elif dep_rel == 'advcl':
                                    advcl_verbs.append(dep_word)
                                
                                # Check for any conj attached to this verb and add it
                                for conj_word, conj_head, conj_rel in dependencies:
                                    if conj_head == i + 1 and conj_rel == 'conj':  # Look for conj attached to the current word
                                        # Check if the conj word is a verb and passes the checks
                                        for token, pos in tokens_pos:
                                            if token == conj_word and 'VERB' in pos:
                                                if is_foreseeability_verb(conj_word) and not is_coercion_verb(conj_word):
                                                    if dep_rel == 'xcomp':
                                                        xcomp_verbs.append(conj_word)
                                                    elif dep_rel == 'ccomp':
                                                        ccomp_verbs.append(conj_word)
                                                    elif dep_rel == 'parataxis':
                                                        parataxis_verbs.append(conj_word)
                                                    elif dep_rel == 'advcl':
                                                        advcl_verbs.append(conj_word)
                                #break
    
    # Step 4: If any of the verb lists are not empty, assign to related category
    if root_verbs or xcomp_verbs or ccomp_verbs or parataxis_verbs or advcl_verbs:
        print(root_verbs)
        print(xcomp_verbs)
        print(ccomp_verbs)
        print(parataxis_verbs)
        print(advcl_verbs)
        return 'related'
    else:
        return 'others'
'''

'\n# The main function to process each sentence\ndef find_valid_verbs(row):\n    print("NEW ROW")\n    \n    dependencies = row[\'dependencies\']\n    tokens_pos = row[\'tokens_pos\']\n\n    counter_i = 0\n    \n    # Lists to store categorized verbs\n    root_verbs = []\n    xcomp_verbs = []\n    ccomp_verbs = []\n    parataxis_verbs = []\n    advcl_verbs = []\n    \n    # Step 1: Identify all root verbs and their indices\n    #roots = []\n    for i, dep in enumerate(dependencies):\n        if len(dep) == 3:\n            word, head, deprel = dep\n            if deprel == \'root\':\n                #roots.append((word, i + 1))  # Save the root verb and its index (i+1)\n                for token, pos in tokens_pos:\n                    if token == word and pos == \'VERB\':\n                        #print("Found VERB")\n                        if is_foreseeability_verb(word) and not is_coercion_verb(word):\n                            #print("F and not C") \n                            r

In [9]:
'''
# TRYING WITH COUNTER

# The main function to process each sentence
def find_valid_verbs(row):
    print("NEW ROW")
    
    dependencies = row['dependencies']
    tokens_pos = row['tokens_pos']

    counter_i = 0
    counter_j = 0
    
    # Lists to store categorized verbs
    roots = []
    root_verbs = []
    xcomp_verbs = []
    ccomp_verbs = []
    parataxis_verbs = []
    advcl_verbs = []
    
    # Step 1: Identify all root verbs and their indices
    #roots = []
    for dep in dependencies:
        if len(dep) == 3:
            word, head, deprel = dep
            counter_i = counter_i + 1
            #print(word, counter_i)
            if deprel == 'punct' and (word == "." or word == ":") and head == roots[0][1]:
                #print("Update Counter I")
                counter_i = 0
                
            if deprel == 'root':
                #roots.append((word, i + 1))  # Save the root verb and its index (i+1)
                roots.append((word, counter_i))
                for token, pos in tokens_pos:
                    if token == word and pos == 'VERB':
                        #print("Found VERB")
                        if is_foreseeability_verb(word) and not is_coercion_verb(word):
                            #print("F and not C") 
                            root_verbs.append((word, counter_i))

                counter_j = 0
                # Check for any conj attached to this root verb and add it if valid
                for conj in dependencies:
                    if len(conj) == 3:
                        conj_word, conj_head, conj_rel = conj
                        counter_j = counter_j +1
                        #print(conj_word, counter_j)
                        if conj_rel == 'punct' and (conj_word == "." or conj_word == ":") and conj_head == roots[0][1]:
                            #print("Update Counter J")
                            counter_j = 0
                        if conj_head == counter_i and conj_rel == 'conj':
                            for token_conj, pos_conj in tokens_pos:
                                if token_conj == conj_word and 'VERB' in pos_conj:
                                    if is_foreseeability_verb(conj_word) and not is_coercion_verb(conj_word):
                                        root_verbs.append((conj_word, counter_j))
                    
                    
        
    #print(root_verbs)

    
    # TILL HERE WORKS 
    
    # ВОПРОС С БУМАЖКИ
    
    # Step 3: Process each root verb and find related tags (xcomp, ccomp, conj, parataxis, advcl)
    for root_verb, root_index in root_verbs:
        for i, dep in enumerate(dependencies):
            if len(dep) == 3:
                dep_word, dep_head, dep_rel = dep
                
                # Handle each type of relation (xcomp, ccomp, parataxis, advcl)
                #print(dep_head, root_index, dep_rel)
                if dep_head == root_index and dep_rel in ['xcomp', 'ccomp', 'parataxis', 'advcl']:
                    #print("found one of xcomp, ccomp, parataxis, advcl")
                    #print(dep_word, dep_head, dep_rel)
                    for token, pos in tokens_pos:
                        if token == dep_word and 'VERB' in pos:
                            #print(token, pos)
                            if is_foreseeability_verb(dep_word) and not is_coercion_verb(dep_word):
                                # Add to the appropriate list based on dep_rel
                                if dep_rel == 'xcomp':
                                    xcomp_verbs.append(dep_word)
                                elif dep_rel == 'ccomp':
                                    ccomp_verbs.append(dep_word)
                                elif dep_rel == 'parataxis':
                                    parataxis_verbs.append(dep_word)
                                elif dep_rel == 'advcl':
                                    advcl_verbs.append(dep_word)
                                
                                # Check for any conj attached to this verb and add it
                                for conj_word, conj_head, conj_rel in dependencies:
                                    if conj_head == i + 1 and conj_rel == 'conj':  # Look for conj attached to the current word
                                        # Check if the conj word is a verb and passes the checks
                                        for token, pos in tokens_pos:
                                            if token == conj_word and 'VERB' in pos:
                                                if is_foreseeability_verb(conj_word) and not is_coercion_verb(conj_word):
                                                    if dep_rel == 'xcomp':
                                                        xcomp_verbs.append(conj_word)
                                                    elif dep_rel == 'ccomp':
                                                        ccomp_verbs.append(conj_word)
                                                    elif dep_rel == 'parataxis':
                                                        parataxis_verbs.append(conj_word)
                                                    elif dep_rel == 'advcl':
                                                        advcl_verbs.append(conj_word)
                                #break
    
    # Step 4: If any of the verb lists are not empty, assign to related category
    if root_verbs or xcomp_verbs or ccomp_verbs or parataxis_verbs or advcl_verbs:
        print(roots)
        print(root_verbs)
        print(xcomp_verbs)
        print(ccomp_verbs)
        print(parataxis_verbs)
        print(advcl_verbs)
        return 'related'
    else:
        return 'others'
'''

'\n# TRYING WITH COUNTER\n\n# The main function to process each sentence\ndef find_valid_verbs(row):\n    print("NEW ROW")\n    \n    dependencies = row[\'dependencies\']\n    tokens_pos = row[\'tokens_pos\']\n\n    counter_i = 0\n    counter_j = 0\n    \n    # Lists to store categorized verbs\n    roots = []\n    root_verbs = []\n    xcomp_verbs = []\n    ccomp_verbs = []\n    parataxis_verbs = []\n    advcl_verbs = []\n    \n    # Step 1: Identify all root verbs and their indices\n    #roots = []\n    for dep in dependencies:\n        if len(dep) == 3:\n            word, head, deprel = dep\n            counter_i = counter_i + 1\n            #print(word, counter_i)\n            if deprel == \'punct\' and (word == "." or word == ":") and head == roots[0][1]:\n                #print("Update Counter I")\n                counter_i = 0\n                \n            if deprel == \'root\':\n                #roots.append((word, i + 1))  # Save the root verb and its index (i+1)\n             

In [10]:
'''

# TRYING WITH all tags in one for cycle

# The main function to process each sentence
def find_valid_verbs(row):
       
    dependencies = row['dependencies']
    tokens_pos = row['tokens_pos']

    counter_i = 0
    counter_j = 0
    counter_x = 0
    
    # Lists to store categorized verbs
    roots = []
    root_verbs = []
    xcomp_verbs = []
    ccomp_verbs = []
    parataxis_verbs = []
    advcl_verbs = []
    
    for dep in dependencies:
        if len(dep) == 3:
            word, head, deprel = dep
            counter_i = counter_i + 1
            #print(word, counter_i)
            
            if roots:
                if deprel == 'punct' and (word == "." or word == ":") and head == roots[0][1]:
                    #print("Update Counter I")
                    counter_i = 0
                
            if deprel == 'root':
                #roots.append((word, i + 1))  # Save the root verb and its index (i+1)
                roots.append((word, counter_i))
                for token, pos in tokens_pos:
                    if token == word and pos == 'VERB':
                        #print("Found VERB")
                        if is_foreseeability_verb(word) and not is_coercion_verb(word):
                            #print("F and not C") 
                            root_verbs.append((word, counter_i))

                counter_j = 0
                # Check for any relater words attached to this root verb and add it if valid
                for related in dependencies:
                    if len(related) == 3:
                        related_word, related_head, related_rel = related
                        counter_j = counter_j +1
                        #print(related_word, counter_j)
                        if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                            #print("Update Counter J")
                            counter_j = 0
                        if related_head == counter_i and related_rel in ['xcomp', 'ccomp', 'parataxis', 'advcl', 'conj']:
                            #print("FOUND RELATED: ", related_rel, " - ", related_word)
                            for token_related, pos_related in tokens_pos:
                                if token_related == related_word and 'VERB' in pos_related:
                                    if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                        #print("RELATED WORD ", related_word, " PASSED ALL CHECKS")
                                        
                                        if related_rel == 'conj':
                                            root_verbs.append((related_word, counter_j))
                                            
                                        elif related_rel == 'xcomp':
                                            xcomp_verbs.append((related_word, counter_j))
                                            # найти conj для этого
                                            counter_x = 0
                                            for conj in dependencies:
                                                if len(conj) == 3:
                                                    conj_word, conj_head, conj_rel = conj
                                                    counter_x = counter_x +1
                                                    if conj_rel == 'punct' and (conj_word == "." or conj_word == ":") and conj_head == roots[0][1]:
                                                        #print("Update Counter J")
                                                        counter_x = 0
                                                    if conj_head == counter_j and conj_rel == 'conj':
                                                        for token_related, pos_related in tokens_pos:
                                                            if token_related == related_word and 'VERB' in pos_related:
                                                                if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                                    xcomp_verbs.append((conj_word, counter_x))
                                        
                                        elif related_rel == 'ccomp':
                                            ccomp_verbs.append((related_word, counter_j))
                                            # найти conj для этого
                                            counter_x = 0
                                            for conj in dependencies:
                                                if len(conj) == 3:
                                                    conj_word, conj_head, conj_rel = conj
                                                    counter_x = counter_x +1
                                                    if conj_rel == 'punct' and (conj_word == "." or conj_word == ":") and conj_head == roots[0][1]:
                                                        #print("Update Counter J")
                                                        counter_x = 0
                                                    if conj_head == counter_j and conj_rel == 'conj':
                                                        for token_related, pos_related in tokens_pos:
                                                            if token_related == related_word and 'VERB' in pos_related:
                                                                if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                                    ccomp_verbs.append((conj_word, counter_x))
                                        
                                        elif related_rel == 'parataxis':
                                            parataxis_verbs.append((related_word, counter_j))
                                            # найти conj для этого
                                            counter_x = 0
                                            for conj in dependencies:
                                                if len(conj) == 3:
                                                    conj_word, conj_head, conj_rel = conj
                                                    counter_x = counter_x +1
                                                    if conj_rel == 'punct' and (conj_word == "." or conj_word == ":") and conj_head == roots[0][1]:
                                                        #print("Update Counter J")
                                                        counter_x = 0
                                                    if conj_head == counter_j and conj_rel == 'conj':
                                                        for token_related, pos_related in tokens_pos:
                                                            if token_related == related_word and 'VERB' in pos_related:
                                                                if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                                    parataxis_verbs.append((conj_word, counter_x))
                                        
                                        elif related_rel == 'advcl':
                                            advcl_verbs.append((related_word, counter_j))
                                            # найти conj для этого
                                            counter_x = 0
                                            for conj in dependencies:
                                                if len(conj) == 3:
                                                    conj_word, conj_head, conj_rel = conj
                                                    counter_x = counter_x +1
                                                    if conj_rel == 'punct' and (conj_word == "." or conj_word == ":") and conj_head == roots[0][1]:
                                                        #print("Update Counter J")
                                                        counter_x = 0
                                                    if conj_head == counter_j and conj_rel == 'conj':
                                                        for token_related, pos_related in tokens_pos:
                                                            if token_related == related_word and 'VERB' in pos_related:
                                                                if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                                    advcl_verbs.append((conj_word, counter_x))
    

    #print("NEW ROW")
    
    if root_verbs or xcomp_verbs or ccomp_verbs or parataxis_verbs or advcl_verbs:
        #print(roots, " - roots")
        #print(root_verbs, " - root_verbs")
        #print(xcomp_verbs, " - xcomp_verbs")
        #print(ccomp_verbs, " - ccomp_verbs")
        #print(parataxis_verbs, " - parataxis_verbs")
        #print(advcl_verbs, " - advcl_verbs")
        #print()
        return 'related'
    else:
        return 0

'''

'\n\n# TRYING WITH all tags in one for cycle\n\n# The main function to process each sentence\ndef find_valid_verbs(row):\n       \n    dependencies = row[\'dependencies\']\n    tokens_pos = row[\'tokens_pos\']\n\n    counter_i = 0\n    counter_j = 0\n    counter_x = 0\n    \n    # Lists to store categorized verbs\n    roots = []\n    root_verbs = []\n    xcomp_verbs = []\n    ccomp_verbs = []\n    parataxis_verbs = []\n    advcl_verbs = []\n    \n    for dep in dependencies:\n        if len(dep) == 3:\n            word, head, deprel = dep\n            counter_i = counter_i + 1\n            #print(word, counter_i)\n            \n            if roots:\n                if deprel == \'punct\' and (word == "." or word == ":") and head == roots[0][1]:\n                    #print("Update Counter I")\n                    counter_i = 0\n                \n            if deprel == \'root\':\n                #roots.append((word, i + 1))  # Save the root verb and its index (i+1)\n                ro

## Current Version

In [11]:
# Define functions to check if a verb belongs to Foreseeability or Coercion groups

def is_foreseeability_verb(verb):
    # This function checks whether a verb belongs to a predefined set of foreseeability-related verb classes.
    foreseeability_classes = {'communication', 'creation', 'consumption', 'competition', 'possession', 'motion'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lexname = synset.lexname().split('.')[1]  # Extracts the lexical category (i.e., type of action)
        if lexname in foreseeability_classes:  # Checks if the lexical category is in the foreseeability class
            return True  # Returns True if the verb matches any foreseeability category
    return False  # If no match is found, returns False


def is_coercion_verb(verb):
    # This function checks whether a verb belongs to a predefined set of coercion-related VerbNet classes.
    coercion_classes = {'urge-58.1', 'force-59', 'forbid-67'}
    synsets = wn.synsets(verb, pos=wn.VERB)  # Fetches all verb synsets for the word
    for synset in synsets:
        lemma = synset.lemmas()[0]  # Gets the first lemma for each synset
        vn_classes = lemma.key().split('%')[0]  # Extracts the lemma key
        vn_class_ids = vn.classids(vn_classes)  # Fetches the VerbNet classes for the lemma
        if any(vn_class in coercion_classes for vn_class in vn_class_ids):  # Checks for a match in coercion classes
            return True  # If a match is found in coercion classes, return True
    return False  # If no match is found, return False

In [12]:
def step_one_function(row):
    # This is the function to find valid verbs (root, xcomp, ccomp, parataxis, advcl and their conjunctions)
    dependencies = row['dependencies']  # Dependency relations for the sentence
    tokens_pos = row['tokens_pos']  # POS-tagged tokens for the sentence
    
    counter_i = 0  # Counter for tracking the index of words in the dependency structure
    counter_j = 0  # Counter for tracking the index during nested loops
    counter_x = 0  # Counter used to track conj words
    
    # Lists to store categorized verbs

    # (word, own index, main root), if root is root (not conj) - write its own index
    roots = []  # For root verbs
    root_verbs = []  # For valid root verbs (that pass foreseeability and coercion checks)

    # (word, own index, head index)
    xcomp_verbs = []  # For xcomp verbs
    ccomp_verbs = []  # For ccomp verbs
    parataxis_verbs = []  # For parataxis verbs
    advcl_verbs = []  # For advcl verbs
    
    # Iterate through dependencies to identify roots and their related verbs
    for dep in dependencies:
        if len(dep) == 3:
            word, head, deprel = dep  # Unpacking the dependency tuple (word, head, relation)
            counter_i += 1  # Increment the index counter for this word
            
            # If an end of the sentence has been found, reset the counter for punctuation
            if roots:
                if deprel == 'punct' and (word == "." or word == ":") and head == roots[0][1]:
                    counter_i = 0  # Reset counter when punctuation is found after root
                
            # Check if the current word is the root of the sentence
            if deprel == 'root':
                roots.append((word, counter_i, counter_i))  # Add the root verb and its index
                for token, pos in tokens_pos:  # Iterate through POS tokens to find the root as a verb
                    if token == word and pos == 'VERB':
                        if is_foreseeability_verb(word) and not is_coercion_verb(word):
                            root_verbs.append((word, counter_i, counter_i))  # Add root verb if it passes foreseeability and coercion checks
                # looking for related conj
                counter_j = 0
                for related in dependencies:
                    if len(related) == 3:
                        related_word, related_head, related_rel = related  # Unpacking the dependency
                        counter_j += 1  # Increment the index for the related word
                        # Reset the counter for punctuation after root - end of the sentence
                        if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                            counter_j = 0
                        # Look for conj attached to the verb
                        if related_head == counter_i and related_rel in ['conj']:
                            roots.append((related_word, counter_j, counter_i))  # Add the root verb and its index
                            for token_related, pos_related in tokens_pos:  # Find if the related word is a verb
                                if token_related == related_word and 'VERB' in pos_related:
                                    if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                        root_verbs.append((related_word, counter_j, counter_i))  # Conj relation to root


    
    # Find related verbs (xcomp, ccomp, etc.) for root verbs and their conj

    for verb in roots:
        word, index, head_index = verb
        #counter_i += 1

        counter_j = 0
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related  # Unpacking the dependency
                counter_j += 1  # Increment the index for the related word
                
                # Reset the counter for punctuation after root - end of the sentence
                if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                    counter_j = 0
                
                # Look for xcomp, ccomp, parataxis or advcl relations attached to the root and its conj
                if related_head == index and related_rel in ['xcomp', 'ccomp', 'parataxis', 'advcl']:
                    #print("found some related word: ", related_rel)
                    for token_related, pos_related in tokens_pos:  # Find if the related word is a verb
                        if token_related == related_word and 'VERB' in pos_related:
                            if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                #print("related word passed all checks: ", related_rel)
                                # Depending on the relation type, add the related verb to the appropriate list
                                #if related_rel == 'conj':
                                   # root_verbs.append((related_word, counter_j))  # Conj relation to root
                                if related_rel == 'xcomp':
                                    xcomp_verbs.append((related_word, counter_j, index))  # xcomp relation to root
                                    # Handle conj for xcomp verbs
                                    counter_x = 0
                                    for conj in dependencies:
                                        if len(conj) == 3:
                                            conj_word, conj_head, conj_rel = conj
                                            counter_x += 1
                                            if conj_rel == 'punct' and (conj_word == "." or conj_word == ":") and conj_head == roots[0][1]:
                                                counter_x = 0  # Reset counter for punctuation
                                            if conj_head == counter_j and conj_rel == 'conj':
                                                # Check if the conj word is a valid verb
                                                for token_related, pos_related in tokens_pos:
                                                    if token_related == related_word and 'VERB' in pos_related:
                                                        if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                            xcomp_verbs.append((conj_word, counter_x, index))  # Conj for xcomp
                                
                                # Handle ccomp, parataxis, advcl similarly for related verbs and their conjunctions
                                elif related_rel == 'ccomp':
                                    ccomp_verbs.append((related_word, counter_j, index))  # ccomp relation
                                    # Handle conj for ccomp
                                    counter_x = 0
                                    for conj in dependencies:
                                        if len(conj) == 3:
                                            conj_word, conj_head, conj_rel = conj
                                            counter_x += 1
                                            if conj_head == counter_j and conj_rel == 'conj':
                                                for token_related, pos_related in tokens_pos:
                                                    if token_related == related_word and 'VERB' in pos_related:
                                                        if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                            ccomp_verbs.append((conj_word, counter_x, index))  # Conj for ccomp
                                
                                elif related_rel == 'parataxis':
                                    parataxis_verbs.append((related_word, counter_j, index))  # parataxis relation
                                    # Handle conj for parataxis
                                    counter_x = 0
                                    for conj in dependencies:
                                        if len(conj) == 3:
                                            conj_word, conj_head, conj_rel = conj
                                            counter_x += 1
                                            if conj_head == counter_j and conj_rel == 'conj':
                                                for token_related, pos_related in tokens_pos:
                                                    if token_related == related_word and 'VERB' in pos_related:
                                                        if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                            parataxis_verbs.append((conj_word, counter_x, index))  # Conj for parataxis
                                
                                elif related_rel == 'advcl':
                                    advcl_verbs.append((related_word, counter_j, index))  # advcl relation
                                    # Handle conj for advcl
                                    counter_x = 0
                                    for conj in dependencies:
                                        if len(conj) == 3:
                                            conj_word, conj_head, conj_rel = conj
                                            counter_x += 1
                                            if conj_head == counter_j and conj_rel == 'conj':
                                                for token_related, pos_related in tokens_pos:
                                                    if token_related == related_word and 'VERB' in pos_related:
                                                        if is_foreseeability_verb(related_word) and not is_coercion_verb(related_word):
                                                            advcl_verbs.append((conj_word, counter_x, index))  # Conj for advcl
    #print("NEW ROW")
    
    # Return the lists of related verbs
    #if root_verbs or xcomp_verbs or ccomp_verbs or parataxis_verbs or advcl_verbs:
        #print(roots, " - roots")
        #print(root_verbs, " - root_verbs")
        #print(xcomp_verbs, " - xcomp_verbs")
        #print(ccomp_verbs, " - ccomp_verbs")
        #print(parataxis_verbs, " - parataxis_verbs")
        #print(advcl_verbs, " - advcl_verbs")
        #print()
    return roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs

In [13]:
def check_agent_validity(related_word, row, tokens_pos):
    
    entities = row['entities']
    valid_ent_labels = ["PERSON", "NORP", "ORG", "GPE"]
    valid_additional_words = ["person", "man", "woman", "police", "administration", "immigrants", "president", "minister", "senator", 
                              "representative", "governor", "mayor", "council", "secretary", "ambassador", "chancellor", "parliamentary", "mr.", "ms.", "mrs."]

    self = False
    agent_is_valid = False
    
    for entity, label in entities: 
        if entity in related_word and label in valid_ent_labels: 
            agent_is_valid = True # is it in NER list?  
    if not agent_is_valid and 'PRON' in [pos for token, pos in tokens_pos if token == related_word]: 
        agent_is_valid = True # is it a promoun?
        if related_word.lower() == "i" or related_word.lower() == "we":
            self = True
    if not agent_is_valid and related_word.lower() in valid_additional_words: agent_is_valid = True # is it from the list of additional words?

    return agent_is_valid, self



def check_causative_verb(verb):
    # Check if the verb is in the CAUSE class or has CAUSETO relation in WordNet
    for synset in wn.synsets(verb, pos=wn.VERB):
        if 'cause' in synset.lemma_names():
            return True
        for lemma in synset.lemmas():
            for frame in lemma.frame_strings():
                if 'CAUSE' in frame or 'CAUSETO' in frame:
                    return True
    return False



def define_polarity(verb, obj):
    # Function to define the Polarity of the combination verb + object taking into attention a negation connected to that verb
    # 0 - others, 1 - positive, 2 - negative

    result = 0
    
    # Create a simple context for WSD
    context = f"{verb} {obj}"
    
    # Word Sense Disambiguation for the verb and object
    verb_sense = lesk(context.split(), verb, 'v')
    obj_sense = lesk(context.split(), obj, 'n')
    
    # Calculate polarity using SentiWordNet
    pos_score = 0
    neg_score = 0
    
    if verb_sense:
        swn_verb = swn.senti_synset(verb_sense.name())
        pos_score += swn_verb.pos_score()
        neg_score += swn_verb.neg_score()
    
    if obj_sense:
        swn_obj = swn.senti_synset(obj_sense.name())
        pos_score += swn_obj.pos_score()
        neg_score += swn_obj.neg_score()

    # AFINN score
    afinn_score = afinn.score(context)
    if afinn_score > 0:
        pos_score += afinn_score
    else:
        neg_score += abs(afinn_score)

    # Subjectivity Lexicon score
    tokens = context.split()
    subj_pos = sum([1 for token in tokens if token in opinion_lexicon.positive()])
    subj_neg = sum([1 for token in tokens if token in opinion_lexicon.negative()])
    
    pos_score += subj_pos
    neg_score += subj_neg

    # Determine final polarity
    if pos_score > neg_score:
        return 1  # Positive/Praise
    elif neg_score > pos_score:
        return 2  # Negative/Blame
    else:
        return 0  # Neutral
        
    return 0



def adjust_sentiment_for_negation(row, polarity, verb):
    word, index, head_index = verb # if index == head_index - main root, not conj
    dependencies = row['dependencies']

    for related in dependencies:
        if len(related) == 3:
            related_word, related_head, related_rel = related  # Unpacking the dependency
            if related_head == index and related_rel in ['advmod'] and (related_word == 'not' or related_word == 'n’t'):
                if polarity == 0:
                    return 0
                if polarity == 1:
                    return 2
                if polarity == 2:
                    return 1
    
    return polarity

In [14]:
def step_two_function(row, roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs):
    
     # This is the function to decide on Agent Causality, find the object, decide on Polarity and classify the row 
    
    dependencies = row['dependencies']  # Dependency relations for the sentence
    tokens_pos = row['tokens_pos']  # POS-tagged tokens for the sentence  

    self = False
    result = None
    agent_is_valid = False
    
    # Сonnection 1: nsubj / nsubj:pass - root_verb - obj / iobj / obl - by priority
    for verb in root_verbs:
        word, index, head_index = verb # if index == head_index - main root, not conj

        # 1 - Find an agent connected to the given verb 
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related  # Unpacking the dependency
                # subject connected to the word itself or to its root
                if related_head == index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                    # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                    agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                else:
                    if related_head == head_index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                        # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                        agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)

        # If agent is not valid, check for causative verbs
        if not agent_is_valid:
            if check_causative_verb(word):
                agent_is_valid = True

        # 3 - Find an object connected to the given verb
        # obj / iobj / obl - by priority
        if agent_is_valid:
            for related in dependencies:
                if len(related) == 3:
                    related_word, related_head, related_rel = related  # Unpacking the dependency
                    if related_head == index and related_rel in ['obj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['iobj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['obl']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity
        

    
    # Сonnection 2: nsubj / nsubj:pass - root_verb - xcomp - obj / iobj / obl - by priority
    for verb in xcomp_verbs:
        word, index, head_index = verb # if index == head_index - main root, not conj

        # 1 - Find an agent connected to the given verb 
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related  # Unpacking the dependency
                # subject connected to the word itself or to its root
                if related_head == index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                    # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                    agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                else:
                    if related_head == head_index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                        # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                        agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)

        # If agent is not valid, check for causative verbs
        if not agent_is_valid:
            if check_causative_verb(word):
                agent_is_valid = True

        # 3 - Find an object connected to the given verb
        # obj / iobj / obl - by priority
        if agent_is_valid:
            for related in dependencies:
                if len(related) == 3:
                    related_word, related_head, related_rel = related  # Unpacking the dependency
                    if related_head == index and related_rel in ['obj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['iobj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['obl']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity


    # Сonnection 3: nsubj / nsubj:pass - parataxis_verbs - (xcomp) - obj / iobj / obl - by priority    
    for verb in parataxis_verbs:
        word, index, head_index = verb # if index == head_index - main root, not conj

        # 1 - Find an agent connected to the given verb 
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related  # Unpacking the dependency
                # subject connected to the word itself or to its root
                if related_head == index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                    # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                    agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                else:
                    if related_head == head_index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                        # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                        agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)

        # If agent is not valid, check for causative verbs
        if not agent_is_valid:
            if check_causative_verb(word):
                agent_is_valid = True

        # 3 - Find an object connected to the given verb
        # obj / iobj / obl - by priority
        if agent_is_valid:
            counter_j = 0
            for related in dependencies:
                if len(related) == 3:
                    related_word, related_head, related_rel = related  # Unpacking the dependency
                    counter_j += 1  # Increment the index for the related word
                    
                    # Reset the counter for punctuation after root - end of the sentence
                    if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                        counter_j = 0
                    if related_head == index and related_rel in ['obj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['iobj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['obl']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['xcomp']:
                        #print("Marvel had happened with parataxis")
                        for related_to_xcomp in dependencies:
                            if len(related_to_xcomp) == 3:
                                related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp  # Unpacking the dependency
                                if related_to_xcomp_head == counter_j and related_rel in ['obj']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity
            
                                if related_to_xcomp_head == counter_j and related_rel in ['iobj']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity
            
                                if related_to_xcomp_head == counter_j and related_rel in ['obl']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity



    # Сonnection 4: nsubj / nsubj:pass - advcl_verbs - (xcomp) - obj / iobj / obl - by priority   
    for verb in advcl_verbs:
        word, index, head_index = verb # if index == head_index - main root, not conj
    
        # 1 - Find an agent connected to the given verb 
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related  # Unpacking the dependency
                # subject connected to the word itself or to its root
                if related_head == index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                    # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                    agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                else:
                    if related_head == head_index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                        # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                        agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)

        # If agent is not valid, check for causative verbs
        if not agent_is_valid:
            if check_causative_verb(word):
                agent_is_valid = True

        # 3 - Find an object connected to the given verb
        # obj / iobj / obl - by priority
        if agent_is_valid:
            counter_j = 0
            for related in dependencies:
                if len(related) == 3:
                    related_word, related_head, related_rel = related  # Unpacking the dependency
                    counter_j += 1  # Increment the index for the related word
                    
                    # Reset the counter for punctuation after root - end of the sentence
                    if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                        counter_j = 0
                    if related_head == index and related_rel in ['obj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['iobj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['obl']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['xcomp']:
                        #print("Marvel had happened with advcl")
                        for related_to_xcomp in dependencies:
                            if len(related_to_xcomp) == 3:
                                related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp  # Unpacking the dependency
                                if related_to_xcomp_head == counter_j and related_rel in ['obj']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity
            
                                if related_to_xcomp_head == counter_j and related_rel in ['iobj']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity
            
                                if related_to_xcomp_head == counter_j and related_rel in ['obl']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity

    
    
    # Checking for other possible connections
    # (word, own index, main root), if root is root (not conj) - write its own index
    # (word, own index, head index)
    
    # АГЕНТ МБ ПРИСОЕДИНЕН К ROOT, А ОБЪЕКТ К ВСПОМОГАТЕЛЬНОМУ ТЕГУ
    # ИСКАТЬ СУБЪЕКТ И К СЕБЕ И К ROOT ПРИСОЕДИНЕННОМУ
    # ОБЪЕКТ ИЩЕМ ПРИСОЕДИНЕННЫЙ К СЕБЕ

    # Сonnection 5: nsubj - ccomp_verbs - (xcomp) - obj / iobj / obl - by priority   
    for verb in ccomp_verbs: 
        word, index, head_index = verb # if index == head_index - main root, not conj
    
        # 1 - Find an agent connected to the given verb 
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related  # Unpacking the dependency
                # subject connected to the word itself or to its root
                if related_head == index and related_rel in ['nsubj']: # is that an agent?
                    # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                    agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                #else:
                    #if related_head == head_index and related_rel in ['nsubj']: # is that an agent?
                        # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                        #agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)

        # If agent is not valid, check for causative verbs
        if not agent_is_valid:
            if check_causative_verb(word):
                agent_is_valid = True

        # 3 - Find an object connected to the given verb
        # obj / iobj / obl - by priority
        if agent_is_valid:
            counter_j = 0
            for related in dependencies:
                if len(related) == 3:
                    related_word, related_head, related_rel = related  # Unpacking the dependency
                    counter_j += 1  # Increment the index for the related word
                    
                    # Reset the counter for punctuation after root - end of the sentence
                    if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                        counter_j = 0
                    if related_head == index and related_rel in ['obj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['iobj']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['obl']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

                    if related_head == index and related_rel in ['xcomp']:
                        #print("Marvel had happened with advcl")
                        for related_to_xcomp in dependencies:
                            if len(related_to_xcomp) == 3:
                                related_to_xcomp_word, related_to_xcomp_head, related_to_xcomp_rel = related_to_xcomp  # Unpacking the dependency
                                if related_to_xcomp_head == counter_j and related_rel in ['obj']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity
            
                                if related_to_xcomp_head == counter_j and related_rel in ['iobj']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity
            
                                if related_to_xcomp_head == counter_j and related_rel in ['obl']:
                                    # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                                    polarity = define_polarity(related_word, related_to_xcomp_word)
                                    polarity = adjust_sentiment_for_negation(row, polarity, related)
                                    if polarity != 0:
                                        if self:
                                            result = "self - " + str(polarity)
                                            return result
                                        return polarity
        



    # Сonnection 6: nsubj:pass - ccomp_verbs - (xcomp) - obl:agent / obl - by priority   
    for verb in ccomp_verbs: 
        word, index, head_index = verb # if index == head_index - main root, not conj
    
        # 1 - Find an agent connected to the given verb 
        for related in dependencies:
            if len(related) == 3:
                related_word, related_head, related_rel = related  # Unpacking the dependency
                # subject connected to the word itself or to its root
                if related_head == index and related_rel in ['obl:agent', 'obl']: # is that an agent?
                    #print("Marvel with ccomp passive happened")
                    # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                    agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)
                #else:
                   #if related_head == head_index and related_rel in ['nsubj', 'nsubj:pass']: # is that an agent?
                        # 2 - Check if it is a relevant agent - NER categories + list of additional terms (secretary and so on)
                       #agent_is_valid, self = check_agent_validity(related_word, row, tokens_pos)

        # If agent is not valid, check for causative verbs
        if not agent_is_valid:
            if check_causative_verb(word):
                agent_is_valid = True

        # 3 - Find an object connected to the given verb
        # obj / iobj / obl - by priority
        if agent_is_valid:
            #print("Even Agent is valid")
            counter_j = 0
            for related in dependencies:
                if len(related) == 3:
                    related_word, related_head, related_rel = related  # Unpacking the dependency
                    counter_j += 1  # Increment the index for the related word
                    
                    # Reset the counter for punctuation after root - end of the sentence
                    if related_rel == 'punct' and (related_word == "." or related_word == ":") and related_head == roots[0][1]:
                        counter_j = 0
                        
                    if related_head == index and related_rel in ['nsubj:pass']:
                        # 4 - Define the Polarity of the combination verb + object taking into attention a negation connected to that verb
                        polarity = define_polarity(word, related_word)
                        polarity = adjust_sentiment_for_negation(row, polarity, verb)
                        if polarity != 0:
                            if self:
                                result = "self - " + str(polarity)
                                return result
                            return polarity

    return 0

In [15]:
def find_valid_verbs(row):
    
    # This is the main function to process each row of data and classify the row 

    # 1 - Find all the related verbs in categories in dependency column 'root', 'xcomp', 'ccomp', 'parataxis', 'advcl', 'conj' (is a verb check - foreseeability check - coercion check)
    roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs = step_one_function(row)
    
    # 2 - If at least one of the lists is not empty - can proceed
    if root_verbs or xcomp_verbs or ccomp_verbs or parataxis_verbs or advcl_verbs:
        
        # 3 - Take a final decision about the label (0 - others, 1 - positive, 2 - negative)
        return step_two_function(row, roots, root_verbs, xcomp_verbs, ccomp_verbs, parataxis_verbs, advcl_verbs)
    
    else:
        return 0

In [16]:
# Apply the function to the small dataset
df_train_ready_merged_small['Final_Result'] = df_train_ready_merged_small.apply(find_valid_verbs, axis=1)
df_train_ready_merged_small = df_train_ready_merged_small[['Sentence', 'Label', 'Final_Result'] + [col for col in df_train_ready_merged_small.columns if col not in ['Sentence', 'Label', 'Final_Result']]]
df_valid_ready_merged_small['Final_Result'] = df_valid_ready_merged_small.apply(find_valid_verbs, axis=1)
df_valid_ready_merged_small = df_train_ready_merged_small[['Sentence', 'Label', 'Final_Result'] + [col for col in df_train_ready_merged_small.columns if col not in ['Sentence', 'Label', 'Final_Result']]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_ready_merged_small['Final_Result'] = df_train_ready_merged_small.apply(find_valid_verbs, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_valid_ready_merged_small['Final_Result'] = df_valid_ready_merged_small.apply(find_valid_verbs, axis=1)


In [17]:
df_train_ready_merged_small

Unnamed: 0,Sentence,Label,Final_Result,tokens_pos,entities,dependencies
0,George is not supporting Clinton.,2,2,"[(George, PROPN), (is, AUX), (not, PART), (sup...","[(George, PERSON), (Clinton, PERSON)]","[(George, 4, nsubj), (is, 4, aux), (not, 4, ad..."
1,Ryan has endorsed Trump and told reporters thi...,1,1,"[(Ryan, PROPN), (has, AUX), (endorsed, VERB), ...","[(Ryan, PERSON), (Trump, PERSON), (this past w...","[(Ryan, 3, nsubj), (has, 3, aux), (endorsed, 0..."
2,"John McGraw, 78, was charged with assault and ...",2,0,"[(John, PROPN), (McGraw, PROPN), (,, PUNCT), (...","[(John McGraw, PERSON), (78, DATE), (Thursday,...","[(John, 7, nsubj:pass), (McGraw, 1, flat), (,,..."
3,The Filipino fighter unleashed a dazzling comb...,1,0,"[(The, DET), (Filipino, ADJ), (fighter, NOUN),...","[(Filipino, NORP), (Margarito, PERSON), (Maywe...","[(The, 3, det), (Filipino, 3, amod), (fighter,..."
4,But the Marlins have failed to make the postse...,0,0,"[(But, CCONJ), (the, DET), (Marlins, PROPN), (...","[(Marlins, ORG), (Loria, PERSON)]","[(But, 5, cc), (the, 3, det), (Marlins, 5, nsu..."
5,"So shortly after 2 a.m., campaign chairman Joh...",0,0,"[(So, ADV), (shortly, ADV), (after, ADP), (2, ...","[(2 a.m., TIME), (John Podesta, PERSON), (Clin...","[(So, 11, advmod), (shortly, 4, advmod), (afte..."
6,The Church has not answered the allegations ot...,0,1,"[(The, DET), (Church, PROPN), (has, AUX), (not...","[(Church, ORG), (Navajo, GPE)]","[(The, 2, det), (Church, 5, nsubj), (has, 5, a..."
7,"Bruno Beschizza, the conservative mayor of has...",0,0,"[(Bruno, PROPN), (Beschizza, PROPN), (,, PUNCT...","[(Bruno Beschizza, PERSON), (Théo, PERSON)]","[(Bruno, 9, nsubj), (Beschizza, 1, flat), (,, ..."
8,"On criminal justice issues, Harris faults Carp...",2,2,"[(On, ADP), (criminal, ADJ), (justice, NOUN), ...","[(Harris, PERSON), (Carper, PERSON), (the 1990...","[(On, 4, case), (criminal, 3, amod), (justice,..."
9,Even the hologame that Finn starts up on the s...,0,0,"[(Even, ADV), (the, DET), (hologame, NOUN), (t...","[(Finn, PERSON), (Chewbacca, PERSON), (A New H...","[(Even, 3, advmod), (the, 3, det), (hologame, ..."


In [18]:
# Apply the function to the dataset
df_train_ready_merged['Final_Result'] = df_train_ready_merged.apply(find_valid_verbs, axis=1)
df_train_ready_merged = df_train_ready_merged[['Sentence', 'Label', 'Final_Result'] + [col for col in df_train_ready_merged_small.columns if col not in ['Sentence', 'Label', 'Final_Result']]]
df_valid_ready_merged['Final_Result'] = df_valid_ready_merged.apply(find_valid_verbs, axis=1)
df_valid_ready_merged = df_valid_ready_merged[['Sentence', 'Label', 'Final_Result'] + [col for col in df_train_ready_merged_small.columns if col not in ['Sentence', 'Label', 'Final_Result']]]

In [19]:
df_train_ready_merged['Final_Result'].value_counts()

Final_Result
0           3701
2            721
1            562
self - 1      31
self - 2      17
Name: count, dtype: int64

In [20]:
df_valid_ready_merged['Final_Result'].value_counts()

Final_Result
0           416
2            82
1            49
self - 2      2
self - 1      1
Name: count, dtype: int64

## Save to Excel random Samples

In [21]:
# Take 20 random rows from each DataFrame
df_train_sample = df_train_ready_merged.sample(n=200, random_state=42)
df_valid_sample = df_valid_ready_merged.sample(n=200, random_state=42)

# Save them to an Excel file with different sheets
with pd.ExcelWriter('sampled_data_2.xlsx') as writer:
    df_train_sample.to_excel(writer, sheet_name='Train_Sample', index=False)
    df_valid_sample.to_excel(writer, sheet_name='Valid_Sample', index=False)

# Evaluation

## Map values in Final_Result column to numbers

In [22]:
# Mapping dictionary
label_mapping = {"self - 1": 1, "self - 2": 2}

# 0 - neutral, 1 - praise, 2 - blame

# Apply the mapping to the 'Final_Result' column
df_train_ready_merged['Final_Result'] = df_train_ready_merged['Final_Result'].replace(label_mapping)
df_valid_ready_merged['Final_Result'] = df_valid_ready_merged['Final_Result'].replace(label_mapping)

  df_train_ready_merged['Final_Result'] = df_train_ready_merged['Final_Result'].replace(label_mapping)
  df_valid_ready_merged['Final_Result'] = df_valid_ready_merged['Final_Result'].replace(label_mapping)


In [23]:
df_train_ready_merged['Final_Result'].value_counts()

Final_Result
0    3701
2     738
1     593
Name: count, dtype: int64

In [24]:
df_valid_ready_merged['Final_Result'].value_counts()

Final_Result
0    416
2     84
1     50
Name: count, dtype: int64

In [25]:
df_train_eval = df_train_ready_merged[['Sentence', 'Label', 'Final_Result']]
df_valid_eval = df_valid_ready_merged[['Sentence', 'Label', 'Final_Result']]

### train

In [26]:
# Extract true labels and predicted labels
y_true_train = df_train_eval['Label']
y_pred_train = df_train_eval['Final_Result']

In [27]:
# Assuming you have a DataFrame with 'Label' as true labels and 'Final_Result' as predicted labels

# Calculate F1 Scores
f1_micro = f1_score(y_true_train, y_pred_train, average='micro')
f1_macro = f1_score(y_true_train, y_pred_train, average='macro')
f1_weighted = f1_score(y_true_train, y_pred_train, average='weighted')

# Calculate Precision and Recall for completeness (optional)
precision_micro = precision_score(y_true_train, y_pred_train, average='micro')
precision_macro = precision_score(y_true_train, y_pred_train, average='macro')
recall_micro = recall_score(y_true_train, y_pred_train, average='micro')
recall_macro = recall_score(y_true_train, y_pred_train, average='macro')

# Create a DataFrame to display the results
results_df = pd.DataFrame({
    'Metric': ['F1 Score', 'Precision', 'Recall'],
    'Micro-average': [f1_micro, precision_micro, recall_micro],
    'Macro-average': [f1_macro, precision_macro, recall_macro],
    'Weighted-average': [f1_weighted, None, None]  # Weighted average only applicable to F1 score here
})

# Display the table
print(results_df)

# You can also use classification report to see more detailed metrics
print(classification_report(y_true_train, y_pred_train))

      Metric  Micro-average  Macro-average  Weighted-average
0   F1 Score       0.566375       0.459475          0.535049
1  Precision       0.566375       0.512161               NaN
2     Recall       0.566375       0.454808               NaN
              precision    recall  f1-score   support

           0       0.60      0.81      0.69      2733
           1       0.37      0.28      0.32       798
           2       0.57      0.28      0.37      1501

    accuracy                           0.57      5032
   macro avg       0.51      0.45      0.46      5032
weighted avg       0.55      0.57      0.54      5032



### valid

In [28]:
# Extract true labels and predicted labels
y_true_valid = df_valid_eval['Label']
y_pred_valid = df_valid_eval['Final_Result']

In [29]:
# Assuming you have a DataFrame with 'Label' as true labels and 'Final_Result' as predicted labels

# Calculate F1 Scores
f1_micro = f1_score(y_true_valid, y_pred_valid, average='micro')
f1_macro = f1_score(y_true_valid, y_pred_valid, average='macro')
f1_weighted = f1_score(y_true_valid, y_pred_valid, average='weighted')

# Calculate Precision and Recall for completeness (optional)
precision_micro = precision_score(y_true_valid, y_pred_valid, average='micro')
precision_macro = precision_score(y_true_valid, y_pred_valid, average='macro')
recall_micro = recall_score(y_true_valid, y_pred_valid, average='micro')
recall_macro = recall_score(y_true_valid, y_pred_valid, average='macro')

# Create a DataFrame to display the results
results_df = pd.DataFrame({
    'Metric': ['F1 Score', 'Precision', 'Recall'],
    'Micro-average': [f1_micro, precision_micro, recall_micro],
    'Macro-average': [f1_macro, precision_macro, recall_macro],
    'Weighted-average': [f1_weighted, None, None]  # Weighted average only applicable to F1 score here
})

# Display the table
print(results_df)

# You can also use classification report to see more detailed metrics
print(classification_report(y_true_valid, y_pred_valid))

      Metric  Micro-average  Macro-average  Weighted-average
0   F1 Score       0.574545       0.466072          0.541533
1  Precision       0.574545       0.527924               NaN
2     Recall       0.574545       0.456872               NaN
              precision    recall  f1-score   support

           0       0.60      0.81      0.69       305
           1       0.44      0.28      0.34        78
           2       0.55      0.28      0.37       167

    accuracy                           0.57       550
   macro avg       0.53      0.46      0.47       550
weighted avg       0.56      0.57      0.54       550



# NEED TO CHANGE LABELS OF TEST DATA FILE AND PREPROCESS IT