## Utils

In [1]:
import json
import os

In [2]:
import torch

In [3]:
from sklearn.metrics import precision_recall_fscore_support

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
def generate_event_information(model, tokenizer, prompt, max_input_length=500, max_new_tokens=50):
    # Tokenize the input prompt, truncating to the maximum input length
    inputs = tokenizer(prompt, return_tensors="pt",
                       truncation=True, max_length=max_input_length)

    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    outputs = model.generate(
        inputs['input_ids'], max_new_tokens=max_new_tokens)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return generated_text

In [6]:
def get_model_response(model, tokenizer, prompt):
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    outputs = model.generate(**inputs, max_length=1024)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

## Google Flan T5 - Few Shot Learning

In [8]:
import re
import json
from transformers import pipeline

def extract_company_name(sentence):
    """Extracts the company name from the sentence based on common patterns."""
    # Regular expression to find company names
    pattern = r"\b[A-Z][a-zA-Z]*\s(?:Inc|Corp|Ltd|LLC|Group|Company|PLC|Corporation|Incorporated|N\.A\.)\b"
    match = re.search(pattern, sentence)
    if match:
        return match.group()
    return None

2024-07-09 00:01:57.611866: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-09 00:02:03.354422: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /local/java/cuda-11.6.0/lib64/:/local/java/cudnn-linux-x86_64-8.5.0.96_cuda11-archive/lib/:/local/java/cuda-12.2/lib64/:/local/java/cudnn-linux-x86_64-8.9.4.25_cuda12-archive/lib/:/local/java/cuda-12.2/extras/CUPTI/lib64/
2024-07-09 00:02:03.355780: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinf

In [9]:
def extract_events_zero_shot(model_name, sentence, event_types):
    """Performs zero-shot event extraction and company name extraction.

    Args:
        model_name (str): Name of the Hugging Face model (e.g., "google/flan-t5-xl").
        sentence (str): The sentence to extract events from.
        event_types (list): List of possible event types.

    Returns:
        dict: A dictionary containing the extracted event type and company name.
    """

    extractor = pipeline("zero-shot-classification", model=model_name)
    candidate_labels = event_types + ["Company"]

    result = extractor(sentence, candidate_labels)

    # Find the event type with the highest score
    event_type = max(result["labels"], key=lambda label: result["scores"][result["labels"].index(label)])
    if event_type not in event_types:
        event_type = "Other/None"  # If no valid event type found

    company_name = extract_company_name(sentence)
    if not company_name:
        company_name = "Unknown"  # "Unknown" if no company name is found

    return {"event_type": event_type, "company": company_name}

model_name = "facebook/bart-large-mnli"
sentence = "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."

event_types = [
    "Acquisition (A)", "Clinical Trial (CT)", "Regular Dividend (RD)",
    "Dividend Cut (DC)", "Dividend Increase (DI)", "Guidance Increase (GI)",
    "New Contract (NC)", "Reverse Stock Split (RSS)", "Special Dividend (SD)",
    "Stock Repurchase (SR)", "Stock Split (SS)", "Other/None (O)"
]

extracted_info = extract_events_zero_shot(model_name, sentence, event_types)
print(json.dumps(extracted_info, indent=2))


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{
  "event_type": "Other/None",
  "company": "CBTX Inc"
}


In [10]:
model_name = "google/flan-t5-xl"
sentence = "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."

event_types = [
    "Acquisition (A)", "Clinical Trial (CT)", "Regular Dividend (RD)",
    "Dividend Cut (DC)", "Dividend Increase (DI)", "Guidance Increase (GI)",
    "New Contract (NC)", "Reverse Stock Split (RSS)", "Special Dividend (SD)",
    "Stock Repurchase (SR)", "Stock Split (SS)", "Other/None (O)"
]

extracted_info = extract_events_zero_shot(model_name, sentence, event_types)
print(json.dumps(extracted_info, indent=2))


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google/flan-t5-xl and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


{
  "event_type": "Reverse Stock Split (RSS)",
  "company": "CBTX Inc"
}


In [11]:
import json

In [15]:
def process_sentence(model_name, sentence):
        event_types = [
            "Acquisition (A)", "Clinical Trial (CT)", "Regular Dividend (RD)",
            "Dividend Cut (DC)", "Dividend Increase (DI)", "Guidance Increase (GI)",
            "New Contract (NC)", "Reverse Stock Split (RSS)", "Special Dividend (SD)",
            "Stock Repurchase (SR)", "Stock Split (SS)", "Other/None (O)"
        ]

        extracted_events = extract_events_zero_shot(model_name, sentence, event_types)

        return extracted_events

In [17]:
from enum import Enum

class EventType(Enum):
    A = "Acquisition (A)"
    CT = "Clinical Trial (CT)"
    RD = "Regular Dividend (RD)"
    DC = "Dividend Cut (DC)"
    DI = "Dividend Increase (DI)"
    GI = "Guidance Increase (GI)"
    NC = "New Contract (NC)"
    RSS = "Reverse Stock Split (RSS)"
    SD = "Special Dividend (SD)"
    SR = "Stock Repurchase (SR)"
    SS = "Stock Split (SS)"
    O = "Other/None (O)"

In [18]:
def evaluate_events(results):
    y_true = []
    y_pred = []

    for result in results:
        actual_events = result["actual_events"]
        extracted_event = result["extracted_events"]

        if not actual_events:
            actual_events = [EventType.O.value]

        for actual, extract in zip(actual_events, extracted_event.values()):
            print(f"Event type: {actual}")
            actual_event_enum = next(
                (e for e in EventType if e.value == actual), EventType.O)
            y_true.append(actual_event_enum.value)
            print(f"y true : {y_true}")

            extracted_event_enum = next(
                (e for e in EventType if e.value == extracted_event["event_type"]), EventType.O)

            y_pred.append(extracted_event_enum.value)
            print(f"y pred : {y_pred}")

        print("-" * 50)

    # Calculate Exact Match (EM)
    exact_matches = sum(1 for yt, yp in zip(y_true, y_pred) if yt == yp)
    em_score = exact_matches / len(y_true)

    # Calculate F1 score
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='weighted')

    print(f"Evaluation metrics - Exact Match (EM): {em_score}, F1 Score: {f1}")
    return {"exact_match": em_score, "f1": f1}

In [19]:
evaluate_events(results)

Event type: O
y true : ['Other/None (O)']
y pred : ['Clinical Trial (CT)']
--------------------------------------------------
Event type: O
y true : ['Other/None (O)', 'Other/None (O)']
y pred : ['Clinical Trial (CT)', 'Reverse Stock Split (RSS)']
--------------------------------------------------
Event type: O
y true : ['Other/None (O)', 'Other/None (O)', 'Other/None (O)']
y pred : ['Clinical Trial (CT)', 'Reverse Stock Split (RSS)', 'Other/None (O)']
--------------------------------------------------
Event type: O
y true : ['Other/None (O)', 'Other/None (O)', 'Other/None (O)', 'Other/None (O)']
y pred : ['Clinical Trial (CT)', 'Reverse Stock Split (RSS)', 'Other/None (O)', 'Dividend Increase (DI)']
--------------------------------------------------
Event type: O
y true : ['Other/None (O)', 'Other/None (O)', 'Other/None (O)', 'Other/None (O)', 'Other/None (O)']
y pred : ['Clinical Trial (CT)', 'Reverse Stock Split (RSS)', 'Other/None (O)', 'Dividend Increase (DI)', 'Dividend Increase 

  _warn_prf(average, modifier, msg_start, len(result))


{'exact_match': 0.350210970464135, 'f1': 0.51875}

## GPT-2 - Fewshot Learning

In [21]:
model_name = "openai-community/gpt2"
sentence = "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."

event_types = [
    "Acquisition (A)", "Clinical Trial (CT)", "Regular Dividend (RD)",
    "Dividend Cut (DC)", "Dividend Increase (DI)", "Guidance Increase (GI)",
    "New Contract (NC)", "Reverse Stock Split (RSS)", "Special Dividend (SD)",
    "Stock Repurchase (SR)", "Stock Split (SS)", "Other/None (O)"
]

extracted_info = extract_events_zero_shot(model_name, sentence, event_types)
print(json.dumps(extracted_info, indent=2))


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at openai-community/gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.
Tokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`


{
  "event_type": "Other/None (O)",
  "company": "CBTX Inc"
}


### Zeroshot Learning - Code Prompt

In [None]:
prompt="""
Event = {
    "event_type": str #options: [Acquisition (A), Clinical Trial (CT), Regular Dividend (RD), Dividend Cut (DC), Dividend Increase (DI), Guidance Increase (GI), New Contract (NC), Reverse Stock Split (RSS), Special Dividend (SD), Stock Repurchase (SR), Stock Split (SS), Other/None (O)]
    "company": str
}

events: List[Event] = extract events in the sentence: "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."
print(json.dumps(events))
"""

codePrompt_response = get_model_response(flan_model_test, flan_tokenizer_test, prompt)

In [None]:
prompt = """
# Task: Extract Events from Text
# Input:
text = "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."

Instructions:
1. dataclass `Event` to represents extracted events:

from dataclasses import dataclass
from typing import List

@dataclass
class Event:
    event_type: str  # Choose from: [A, CT, RD, DC, DI, GI, NC, RSS, SD, SR, SS, O]
    company: str

2. Function `extract_events` takes the input text and returns a list of Event objects:

def extract_events(text: str) -> List[Event]:
    # - Identify financial/corporate events in the text.
    # - Classify events based on the provided types.
    # - Extract the primary company associated with each event.
    return events

3. Extract events from the provided text and print the result as JSON:

events = extract_events(text)
import json
print(json.dumps(events))
"""

codePrompt_response = get_model_response(flan_model_test, flan_tokenizer_test, prompt)

In [None]:
print(codePrompt_response)

### 3. Explanation Prompt

In [22]:
explanation_prompt="""
Task: Event Extraction

Instructions:

1. Identify any financial or corporate events within the sentence.
2. Classify each identified event using the following types:
   - Acquisition (A): A company purchases another company or a significant portion of it.
   - Clinical Trial (CT): A company conducts a research study to test new medical treatments or drugs.
   - Regular Dividend (RD): A company distributes a portion of its earnings to shareholders regularly.
   - Dividend Cut (DC): A company reduces the amount of dividend it pays out to shareholders.
   - Dividend Increase (DI): A company increases the amount of dividend it pays out to shareholders.
   - Guidance Increase (GI): A company raises its future earnings or revenue forecast.
   - New Contract (NC): A company secures a new agreement for providing goods or services.
   - Reverse Stock Split (RSS): A company reduces the number of its outstanding shares to increase the share price.
   - Special Dividend (SD): A company makes a one-time distribution of additional earnings to shareholders.
   - Stock Repurchase (SR): A company buys back its own shares from the marketplace.
   - Stock Split (SS): A company increases the number of its outstanding shares by dividing its current shares.
   - Other/None (O): Events that do not fit into any of the specified categories.
3. Extract the primary corporate entity (company name) directly associated with each event. Use contextual clues and focus on entities performing financial actions. Prioritise the company name. A company associated is the primary corporate entity directly involved in the financial or corporate event being reported.

4. Output a JSON array of dictionaries, each containing:
    - "event_type": [Use the exact event classification code from the provided list, e.g., "RD" for Regular Dividend]
    - "company": [Identify the primary company performing the financial action, often the publicly traded parent company]

5. Ensure the output is valid JSON. Double-check for proper formatting, brackets, commas, and quotation marks.

## Important: The final output should be a JSON array, not an explanation of the instructions.


Example 1:
Input: "Company A announces a regular dividend and a new stock repurchase program."
Output: [{"event_type": "RD", "company": "Company A"}, {"event_type": "SR", "company": "Company A"}]

Example 2:
Input: "MegaCorp (parent company of Subsidiary B) declares a quarterly cash dividend payable to shareholders of record."
Output: [{"event_type": "RD", "company": "MegaCorp"}]

Example 3:
Input: "BigPharma Inc., the parent company of BioTech Labs, announced a successful Phase III clinical trial."
Output: [{"event_type": "CT", "company": "BigPharma Inc."}]

Extract the event from the following sentence:
Input: "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."
"""

explanation_response = get_model_response(flan_model_test, flan_tokenizer_test, explanation_prompt)

In [23]:
print(explanation_response)

Output: ["event_type": "RD", "company": "CommunityBank of Texas N.A."], ["event_type": "RD", "company": "CommunityBank of Texas N.A."], ["event_type": "RD", "company": "CommunityBank of Texas N.A."]


### 4. Pipeline Prompt

In [None]:
pipeline_prompt="""

Stage 1: 
Extract financial or corporate events in the sentence, as well as the primary corporate entity (company) involved in the event. Return the output in JSON format as this: [{"event_type": event type, "company": "company name"}].
Event type options: Acquisition (A), Clinical Trial (CT), Regular Dividend (RD), Dividend Cut (DC), Dividend Increase (DI), Guidance Increase (GI), New Contract (NC), Reverse Stock Split (RSS), Special Dividend (SD), Stock Repurchase (SR), Stock Split (SS), Other/None (O).

Instructions:
1. Clearly identify the main events described in the sentence.
2. Classify each event using the provided list of event types.
3. Extract the company name directly associated with each event, prioritizing the main corporate entity mentioned in the sentence.
4. Ensure the output is a valid JSON array with square brackets.

Sentence: "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."

Output (Stage 1): 

Stage 2: Answer the question related to the given sentence and given event information.

Instructions:
1. Analyze the extracted events from Stage 1.
2. Iterate over each event, and for each:
    - Determine the corresponding question from the list below based on the event type.
    - Find the answer to the question within the provided sentence.
    - Extract the exact span of text that answers the question.
    - If no answer is found, return "N/A".
3. Output the answers as a JSON array, in the format: `[{"event_type": event type, "question": question, "answer": answer}]`

Questions for each event type:
Acquisition (A): What company was acquired?
Clinical Trial (CT): What was the result of the clinical trial?
Regular Dividend (RD): What is the amount of the regular dividend?
Dividend Cut (DC): By how much was the dividend cut?
Dividend Increase (DI): By how much was the dividend increased?
Guidance Increase (GI): What is the new guidance value?
New Contract (NC): What is the nature of the new contract?
Reverse Stock Split (RSS): What is the ratio of the reverse stock split?
Special Dividend (SD): What is the amount of the special dividend?
Stock Repurchase (SR): How many shares were repurchased?
Stock Split (SS): What is the ratio of the stock split?

Sentence: "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."
Complete Stage 1 and then Stage 2 based on Stage 1's output.
"""

pipeline_response = get_model_response(flan_model_test, flan_tokenizer_test, pipeline_prompt)

In [None]:
print(pipeline_response)

## GPT

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the model and tokenizer
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")


In [None]:
prompt = """Extract event information from the following sentences and return events in JSON format as: \
[{"event_type": "event", "company": "corporate_entity"}]. \
The event_type should be one of the following: Acquisition (A), Clinical Trial (CT), Regular Dividend (RD), Dividend Cut (DC), \
Dividend Increase (DI), Guidance Increase (GI), New Contract (NC), Reverse Stock Split (RSS), Special Dividend (SD), \
Stock Repurchase (SR), Stock Split (SS). If there is no event, use "O". The corporate_entity is the company associated with the event.

Example:
Sentence: "Company XYZ announces a new contract for the next five years."
Output: [{"event_type": "New Contract", "company": "Company XYZ"}]

Sentence: "ABC Corp. has announced a dividend increase."
Output: [{"event_type": "Dividend Increase", "company": "ABC Corp."}]

Sentence: "Tech Innovations Inc. is planning to repurchase its stock."
Output: [{"event_type": "Stock Repurchase", "company": "Tech Innovations Inc."}]

Sentence: "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., \
the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly \
cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders \
of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended \
its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. \
believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. \
The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of \
approximately $5.4 million under its repurchase program."""

response = get_model_response(gpt2_model, gpt2_tokenizer, prompt)
print("Generated Text:", response)

## Llama 3B

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

llama_tokenizer = AutoTokenizer.from_pretrained("openlm-research/open_llama_3b_v2")
llama_model = AutoModelForCausalLM.from_pretrained("openlm-research/open_llama_3b_v2")

In [None]:
prompt="""
Task: Event Extraction

Instructions:

1. Identify any financial or corporate events within the sentence.
2. Classify each identified event using the following types:
   - Acquisition (A)
   - Clinical Trial (CT)
   - Regular Dividend (RD)
   - Dividend Cut (DC)
   - Dividend Increase (DI)
   - Guidance Increase (GI)
   - New Contract (NC)
   - Reverse Stock Split (RSS)
   - Special Dividend (SD)
   - Stock Repurchase (SR)
   - Stock Split (SS)
   - Other/None (O)
3. Extract the primary corporate entity (company name) directly associated with each event. Use contextual clues and focus on entities performing financial actions. Prioritise the company name.

4. Output a JSON array of dictionaries, each containing:
    - "event_type": [Use the exact event classification code from the provided list, e.g., "RD" for Regular Dividend]
    - "company": [Identify the primary company performing the financial action, often the publicly traded parent company]

5. Ensure the output is valid JSON. Double-check for proper formatting, brackets, commas, and quotation marks.

Example 1:
Input: "Company A announces a regular dividend and a new stock repurchase program."
Output: [{"event_type": "RD", "company": "Company A"}, {"event_type": "SR", "company": "Company A"}]

Example 2:
Input: "MegaCorp (parent company of Subsidiary B) declares a quarterly cash dividend payable to shareholders of record."
Output: [{"event_type": "RD", "company": "MegaCorp"}]

Example 3:
Input: "BigPharma Inc., the parent company of BioTech Labs, announced a successful Phase III clinical trial."
Output: [{"event_type": "CT", "company": "BigPharma Inc."}]

Extract the event from the following sentence:
Input: "CBTX Inc. Declares Quarterly Dividend and Suspends Repurchase Program. HOUSTON, March 18, 2020 ( ) CBTX, Inc., the bank holding company for CommunityBank of Texas N.A., today announced that its Board of Directors declared a quarterly cash dividend in the amount of $0.10 per share of common stock. The dividend will be payable on April 15, 2020 to shareholders of record as of the close of business on April 1, 2020. In addition, CBTX, Inc. today announced that it has temporarily suspended its share repurchase program in light of the challenges presented by the COVID-19 pandemic and surrounding events. CBTX, Inc. believes that it remains strong and well-capitalized, and the Company may reinstate the share repurchase program in the future. The Company repurchased 240,445 shares of common stock during the first quarter of 2020 for an aggregate purchase price of approximately $5.4 million under its repurchase program."
"""

response = get_model_response(llama_model, llama_tokenizer, prompt)

In [None]:
print(response)

## FinGPT

In [None]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM

config = PeftConfig.from_pretrained("FinGPT/fingpt-forecaster_dow30_llama2-7b_lora")
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = PeftModel.from_pretrained(base_model, "FinGPT/fingpt-forecaster_dow30_llama2-7b_lora")