In [14]:
# run on python 3.9 or lesser - as sentence piece throws an error while installing frame semantic transformer
# ! pip install frame_semantic_transformer

In [79]:
import ssl 
import nltk
import pandas as pd
import matplotlib.pyplot as plt
import spacy

from frame_semantic_transformer import FrameSemanticTransformer

nlp = spacy.load("en_core_web_sm")

In [16]:
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download('wordnet')
nltk.download('framenet_v17')

[nltk_data] Downloading package wordnet to /Users/avtk/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package framenet_v17 to
[nltk_data]     /Users/avtk/nltk_data...
[nltk_data]   Package framenet_v17 is already up-to-date!


True

In [75]:
import pickle
import logging 

logger = logging.getLogger()

# Function to process each article and return frames
def process_article(article_text:str):
    """Function to process each article 
        and tokenize sentences

    Args:
        article_text (_type_): string

    Returns:
        _type_: list
    """
    # Tokenize the article into sentences
    try:
        doc = nlp(article_text)
        sentences = [sent.text for sent in doc.sents]
        sentences = [sent for sent in sentences if sent.strip() != ""]
        return sentences
    except Exception as e: 
        logger.error(e)
    else:
        return []

def get_frame_semantics(filename: str, region: str):
    """Function to get frame semantics 

    Args:
        df (pd.DataFrame): dataframe for region
        filename (str): name of file
    """

    pickle_obj = {"index": [], "frame_semantics": []}
    
    df = pd.read_csv(filename)
    frame_transformer = FrameSemanticTransformer()

    try:
        for row in df.iterrows():
            index = row[0]
            print(row[0])
            doc_semantics = []
            article_text = row[1].maintext
            sentences = process_article(article_text)
            print(len(sentences))
            doc_semantics = [frame_transformer.detect_frames(sentence) for sentence in sentences]
            print(doc_semantics)
    except Exception as e: 
        logger.error(e)
    else: 
        pickle_obj["index"].append(index)
        pickle_obj["frame_semantics"].append(doc_semantics) 
        pickle.dump(pickle_obj, file = open(f"../data/processed/doc_semantics/frame_semantics_{region}.pickle", "wb"))
    

In [76]:
region = "MiddleEast"
df = pd.read_csv(f"../data/raw/filtered_data/{region}.csv")

get_frame_semantics(f"../data/raw/filtered_data/{region}.csv", region)


0
3
[DetectFramesResult(sentence='CNN —\nDr. Mohammed Ghneim has not left his hospital in Gaza City in four weeks.', trigger_locations=[34, 43, 73], frames=[FrameResult(name='Departing', trigger_location=34, frame_elements=[FrameElementResult(name='Theme', text='Dr. Mohammed Ghneim'), FrameElementResult(name='Source', text='his hospital in Gaza City'), FrameElementResult(name='Time', text='in four weeks')]), FrameResult(name='Buildings', trigger_location=43, frame_elements=[FrameElementResult(name='Building', text='hospital')]), FrameResult(name='Calendric_unit', trigger_location=73, frame_elements=[FrameElementResult(name='Count', text='four'), FrameElementResult(name='Unit', text='weeks')])]), DetectFramesResult(sentence='He can’t remember the last time he slept or ate, and his blue scrubs are stained in the blood of patients who’ve died in his arms.\n', trigger_locations=[9, 35, 44, 113, 125], frames=[FrameResult(name='Memory', trigger_location=9, frame_elements=[FrameElementResult(

In [77]:
region = "MiddleEast"
sem_frames = pickle.load(open(f"../data/processed/doc_semantics/frame_semantics_{region}.pickle", "rb"))
len(sem_frames["frame_semantics"])

1

In [78]:
sem_frames

{'index': [0],
 'frame_semantics': [[DetectFramesResult(sentence='CNN —\nDr. Mohammed Ghneim has not left his hospital in Gaza City in four weeks.', trigger_locations=[34, 43, 73], frames=[FrameResult(name='Departing', trigger_location=34, frame_elements=[FrameElementResult(name='Theme', text='Dr. Mohammed Ghneim'), FrameElementResult(name='Source', text='his hospital in Gaza City'), FrameElementResult(name='Time', text='in four weeks')]), FrameResult(name='Buildings', trigger_location=43, frame_elements=[FrameElementResult(name='Building', text='hospital')]), FrameResult(name='Calendric_unit', trigger_location=73, frame_elements=[FrameElementResult(name='Count', text='four'), FrameElementResult(name='Unit', text='weeks')])]),
   DetectFramesResult(sentence='He can’t remember the last time he slept or ate, and his blue scrubs are stained in the blood of patients who’ve died in his arms.\n', trigger_locations=[9, 35, 44, 113, 125], frames=[FrameResult(name='Memory', trigger_location=9, 

In [None]:
region = "UK"
sem_frames = pickle.load(open(f"../data/processed/frame_semantics/frame_semantics_{region}.pickle", "rb"))
len(sem_frames["frame_semantics"])

9116

In [None]:
len(sem_frames['frame_semantics'])

9116

In [None]:
frames = []
for title_frames in sem_frames['frame_semantics']:
    for f_results in title_frames.frames:
        frames.append(f_results.name)

len(frames)

34028

In [None]:
def find_frequently_occurring_frames(frames:list):
    """Function to find frequently occurring frames

    Args:
        frames (list): list of frames

    Returns:
        dict: dictionary of frames
    """
    frame_dict = {}
    for frame in frames:
        if frame in frame_dict:
            frame_dict[frame] += 1
        else:
            frame_dict[frame] = 1
    return frame_dict


In [None]:
frame_dict = find_frequently_occurring_frames(frames)
frame_df = pd.DataFrame(frame_dict.items(), columns=['frame', 'count'])
frame_df = frame_df.sort_values('count', ascending=False)

with pd.option_context('display.max_colwidth', None):
  display(frame_df)

print(frame_df.frame.values) 
frame_df.head(40)


Unnamed: 0,frame,count
4,Statement,1908
2,Hostile_encounter,1182
18,Leadership,1138
62,Attack,814
31,Calendric_unit,730
...,...,...
482,Experimentation,1
483,Color_qualities,1
686,Ground_up,1
499,Encoding,1


['Statement' 'Hostile_encounter' 'Leadership' 'Attack' 'Calendric_unit'
 'Killing' 'Kinship' 'Judgment_communication' 'Political_locales'
 'Request' 'People' 'Buildings' 'Relative_time' 'Event' 'Arriving'
 'Aggregate' 'Assistance' 'Time_vector' 'Discussion' 'Causation' 'Age'
 'Education_teaching' 'Emotion_directed' 'Terrorism' 'Taking_sides'
 'Commitment' 'People_by_vocation' 'Measure_duration' 'Increment' 'Death'
 'Text' 'Desiring' 'Reveal_secret' 'People_by_age' 'Locale_by_use'
 'Awareness' 'Military' 'Evidence' 'Protest' 'Intentionally_act'
 'Personal_relationship' 'Arrest' 'Cardinal_numbers' 'Attempt_suasion'
 'Make_agreement_on_action' 'Vehicle' 'Change_of_leadership' 'Telling'
 'Catastrophe' 'Releasing' 'Attempt' 'Weapon' 'Experiencer_focus'
 'Ordinal_numbers' 'Physical_artworks' 'Removing' 'Finish_competition'
 'Cause_harm' 'Using' 'Body_parts' 'Labeling' 'Being_at_risk'
 'Criminal_investigation' 'Cause_motion' 'Locative_relation' 'Origin'
 'Social_event' 'Roadways' 'Quitting' '

Unnamed: 0,frame,count
4,Statement,1908
2,Hostile_encounter,1182
18,Leadership,1138
62,Attack,814
31,Calendric_unit,730
68,Killing,652
22,Kinship,611
20,Judgment_communication,566
66,Political_locales,556
81,Request,473


In [None]:
def find_frames_coocurring_with_death():
    """Function to find frames co-occurring with death
    """
    death_frames = []
    for title_frames in sem_frames['frame_semantics']:
        for f_results in title_frames.frames:
            if f_results.name == "Attack":
                print(f_results)
                death_frames.extend([f.name for f in title_frames.frames])
    return death_frames


def frquency_count_assailants_attack_frame():
    """Function to find frequency count of assailants in attack frame
    """
    assailants = {}
    for title_frames in sem_frames['frame_semantics']:
        for f_results in title_frames.frames:
            if f_results.name == "Attack":
                for f in f_results.frame_elements:
                    if f.name == "Assailant":
                        assailants[f.text] = assailants.get(f.text, 0) + 1
    return assailants

assailants = frquency_count_assailants_attack_frame()

In [None]:
assailants_df = pd.DataFrame(assailants.items(), columns=['assailant', 'count'])
assailants_df

Unnamed: 0,assailant,count
0,Sex and the City star Cynthia Nixon,1
1,Hamas',14
2,Israeli,71
3,Hamas,74
4,Hamas terrorist,11
...,...,...
158,al-Shifa,1
159,German cops,1
160,their,2
161,The UN,1


In [None]:
death_frames = find_frames_coocurring_with_death()
print(death_frames)

FrameResult(name='Attack', trigger_location=8, frame_elements=[FrameElementResult(name='Assailant', text='Israeli'), FrameElementResult(name='Victim', text='on southern Gaza')])
FrameResult(name='Attack', trigger_location=52, frame_elements=[FrameElementResult(name='Manner', text='deadly'), FrameElementResult(name='Depictive', text='drone'), FrameElementResult(name='Place', text='in Jordan')])
FrameResult(name='Attack', trigger_location=41, frame_elements=[])
FrameResult(name='Attack', trigger_location=33, frame_elements=[FrameElementResult(name='Assailant', text='Hamas')])
FrameResult(name='Attack', trigger_location=48, frame_elements=[FrameElementResult(name='Weapon', text='missile'), FrameElementResult(name='Victim', text='at Al-Asad Airbase in Iraq')])
FrameResult(name='Attack', trigger_location=26, frame_elements=[FrameElementResult(name='Assailant', text='Hamas')])
FrameResult(name='Attack', trigger_location=41, frame_elements=[FrameElementResult(name='Assailant', text='Hamas ter

In [None]:
frame_transformer = FrameSemanticTransformer() 
result = frame_transformer.detect_frames("The attack on the World Trade Center killed a lot of people. Thousands of people died in the attack.")

In [None]:
result.frames

[FrameResult(name='Attack', trigger_location=4, frame_elements=[FrameElementResult(name='Victim', text='on the World Trade Center')]),
 FrameResult(name='Killing', trigger_location=37, frame_elements=[FrameElementResult(name='Cause', text='The attack on the World Trade Center'), FrameElementResult(name='Victim', text='a lot of people')]),
 FrameResult(name='People', trigger_location=53, frame_elements=[FrameElementResult(name='Person', text='people')]),
 FrameResult(name='Quantified_mass', trigger_location=61, frame_elements=[FrameElementResult(name='Quantity', text='Thousands'), FrameElementResult(name='Individuals', text='of people')]),
 FrameResult(name='People', trigger_location=74, frame_elements=[FrameElementResult(name='Person', text='people')]),
 FrameResult(name='Death', trigger_location=81, frame_elements=[FrameElementResult(name='Protagonist', text='Thousands of people'), FrameElementResult(name='Containing_event', text='in the attack')]),
 FrameResult(name='Attack', trigger

In [None]:
""" Function to run frame semantic transformer on a bulk of documents"""

def run_frame_semantic_transformer_bulk(df: pd.DataFrame, region: str):
    """Function to run frame semantic transformer on a bulk of documents

    Args:
        df (pd.DataFrame): Dataframe for region
        region (str): Region name
    """
    frame_transformer = FrameSemanticTransformer(batch_size=16) 
    iterator = chunks(df, 16)
    pickle_obj = {"indices": [], "frame_semantics": []} 
    for chunk in iterator:
        try:
            result = frame_transformer.detect_frames_bulk(chunk)
        except Exception as e: 
            logger.error(e)
        else: 
            indices = list(chunk.index)
            pickle_obj["indices"].extend(indices)
            pickle_obj["frame_semantics"].extend(result) 
            pickle.dump(pickle_obj, file = open(f"./data/processed/frame_semantics_{region}.pickle", "wb"))