In [7]:
import json
import os

import logging
from enum import Enum

from sklearn.metrics import precision_recall_fscore_support
# Configure logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

In [6]:
class EventType(Enum):
    A = "Acquisition (A)"
    CT = "Clinical Trial (CT)"
    RD = "Regular Dividend (RD)"
    DC = "Dividend Cut (DC)"
    DI = "Dividend Increase (DI)"
    GI = "Guidance Increase (GI)"
    NC = "New Contract (NC)"
    RSS = "Reverse Stock Split (RSS)"
    SD = "Special Dividend (SD)"
    SR = "Stock Repurchase (SR)"
    SS = "Stock Split (SS)"
    O = "Other/None (O)"

In [2]:
gpt_results_path = os.path.join(os.getcwd(), "EDT_dataset", "Event_detection", "gpt_extracted_events.json")
print(gpt_results_path)

/dcs/large/u5579267/EventExtraction/EDT_dataset/Event_detection/gpt_extracted_events.json


In [3]:
with open(gpt_results_path, "r") as file:
    results = json.load(file)

print(results[0])

{'sentence': 'Windtree Therapeutics Announces Reverse Stock Split WARRINGTON , Pa. , April 28 , 2020 / / Windtree Therapeutics , Inc . ( OTCQB: WINT ) , a biotechnology and medical device company focused on developing drug product candidates and medical device technologies to address acute cardiovascular and pulmonary diseases , today announced a 1-for-3 reverse stock split of its issued and outstanding common stock . The Company\'s common stock will begin trading on a split-adjusted basis at the opening of the OTCQB Market on Wednesday , April 29 , 2020 under the symbol "WINTD" . After 20 trading days , the symbol will revert to WINT . Effective with the reverse split , a new CUSIP number of 97382D 303 has been assigned to the Company\'s common stock . The number of shares of common stock authorized under the Company\'s Amended and Restated Certificate of Incorporation is unchanged at 120 million shares . The Company is pursuing a strategy intended to result in the listing of its comm

In [15]:
def evaluate_events(results):
    logging.info("Evaluating extracted events")

    y_true = []
    y_pred = []

    for result in results:
        actual_events = result["actual_events"]
        extracted_event = result["extracted_events"]

        if not actual_events:
            actual_events = [EventType.O.value]

        if type(extracted_event) == list:
            extracted_event = extracted_event[0]
        
        for actual, extract in zip(actual_events, extracted_event.values()):
            actual_event_enum = next(
                (e for e in EventType if e.value == actual), EventType.O)
            y_true.append(actual_event_enum.value)

            extracted_event_enum = next(
                (e for e in EventType if e.value == extracted_event["event_type"]), EventType.O)

            y_pred.append(extracted_event_enum.value)

    # Calculate Exact Match (EM)
    exact_matches = sum(1 for yt, yp in zip(y_true, y_pred) if yt == yp)
    em_score = exact_matches / len(y_true)

    # Calculate F1 score
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='weighted')

    logging.info(
        f"Evaluation metrics - Exact Match (EM): {em_score}, F1 Score: {f1}")
    logging.info("=" * 50)
    return {"exact_match": em_score, "f1": f1}

In [16]:
evaluation_metrics = evaluate_events(results)

print(evaluation_metrics)

2024-07-08 18:23:59,928 - INFO - Evaluating extracted events
2024-07-08 18:23:59,934 - INFO - Evaluation metrics - Exact Match (EM): 1.0, F1 Score: 1.0


{'exact_match': 1.0, 'f1': 1.0}
