In [1]:
import torch
from scipy.spatial.distance import cosine
from transformers import AutoModel, AutoTokenizer
import numpy as np
from typing import List

np.set_printoptions(suppress=True, formatter={'float_kind':'{:f}'.format})

tokenizer = AutoTokenizer.from_pretrained("princeton-nlp/sup-simcse-roberta-large")
model = AutoModel.from_pretrained("princeton-nlp/sup-simcse-roberta-large")


def prompt_similarity(prompts: List[str]) -> np.array:
    # Tokenize input texts

    inputs = tokenizer(prompts, padding=True, truncation=True, return_tensors="pt")

    # Get the embeddings
    with torch.no_grad():
        embeddings = model(**inputs, output_hidden_states=True, return_dict=True).pooler_output

    cosine_sim = np.zeros([len(embeddings),len(embeddings)])
    # Calculate cosine similarities
    # Cosine similarities are in [-1, 1]. Higher means more similar
    for i in range(len(embeddings)):
        for j in range(i, len(embeddings)):
            cosine_sim[i][j] = cosine_sim[j][i] = 1 - cosine(embeddings[i], embeddings[j])
    return cosine_sim

ModuleNotFoundError: No module named 'torch'

In [2]:
original_fpb_prompts = [
    "Analyze the sentiment of this statement extracted from a financial news article. Provide your answer as either negative, positive, or neutral."
]
fpb_prompts = [
    *original_fpb_prompts,
    "Evaluate the sentiment in this excerpt from a financial news report. Classify the sentiment as positive, negative, or neutral.",
    "Determine the sentiment of this financial journalism snippet. Categorize the sentiment as neutral, positive, or negative.",
    "Interpret the sentiment in this extract from a piece of financial news. Label the sentiment as negative, neutral, or positive.",
    "Assess the sentiment in this financial news excerpt. Judge the sentiment as positive, neutral, or negative.",
    "Examine the sentiment of this financial news snippet. Analyse the sentiment as neutral, negative, or positive.",
    "Gauge the sentiment of this financial news excerpt, classifying it as negative, positive, or neutral.",
    "Categorize the sentiment in this statement from a financial article as negative, positive, or neutral.",
    "Identify the underlying sentiment in this financial news snippet, labeling it as negative, positive, or neutral.",
    "Discern the sentiment of this financial news quote, indicating whether it's negative, positive, or neutral.",
    "Pinpoint the sentiment expressed in this financial article extract, characterizing it as negative, positive, or neutral.",
    "Gauge the sentiment behind this financial news extract. Respond with a classification of negative, positive, or neutral.",
    "Review the sentiment of this financial journalism excerpt. Categorize your findings as negative, positive, or neutral.",
    "Decipher the sentiment in this statement from a finance article. Label it as negative, positive, or neutral.",
    "Parse the sentiment contained within this financial news passage. Designate it as negative, positive, or neutral.",
    "Scrutinize the sentiment of this finance-related news extract. Identify it as negative, positive, or neutral.",
    "Analyze the sentiment in this financial news passage, classifying them as negative, positive, or neutral.",
    "Unpack the sentiment conveyed by this finance-related news snippet, labeling it negative, positive, or neutral.",
    "Decode the sentiment of this financial journalism excerpt, categorizing it as negative, positive, or neutral.",
    "Break down the sentiment expressed in this financial article sample, designating it negative, positive, or neutral.",
    "Distill the sentiment from this financial news fragment, identifying it as negative, positive, or neutral."
]

fpb_prompts_str = ', '.join([f'"{prompt}"' for prompt in fpb_prompts])
fpb_prompts_str += ','

prompt_similarity(fpb_prompts)

array([[1.000000, 0.909519, 0.893049, 0.906792, 0.901928, 0.898932],
       [0.909519, 1.000000, 0.914067, 0.947287, 0.973954, 0.944108],
       [0.893049, 0.914067, 1.000000, 0.904539, 0.926729, 0.929165],
       [0.906792, 0.947287, 0.904539, 1.000000, 0.941445, 0.928774],
       [0.901928, 0.973954, 0.926729, 0.941445, 1.000000, 0.963443],
       [0.898932, 0.944108, 0.929165, 0.928774, 0.963443, 1.000000]])

In [4]:
original_fiqasa_prompts = [
    "What is the sentiment of the following financial {category_0}: Positive, Negative, or Neutral?",
]
fiqasa_prompts = [
    *original_fiqasa_prompts,
    "Analyze the sentiment of this financial {category_0}. Classify the sentiment as Positive, Negative, or Neutral.",
    "Evaluate the sentiment in the given financial {category_0}. Categorize the sentiment as Positive, Negative, or Neutral.",
    "Determine the sentiment in this financial {category_0}. Label the sentiment as Positive, Negative, or Neutral.",
    "Assess the sentiment of the presented financial {category_0}. Judge the sentiment as Positive, Negative, or Neutral.",
    "Interpret the sentiment of the following financial {category_0}. Analyse the sentiment as Positive, Negative, or Neutral."
    "Classify the sentiment of the following financial {category_0} as either Positive, Negative, or Neutral.",
    "Identify the sentiment in this financial {category_0}: Is it Positive, Negative, or Neutral?",
    "Gauge the sentiment of the provided financial {category_0}: Positive, Negative, or Neutral?",
    "Categorize the sentiment of this financial {category_0} as Positive, Negative, or Neutral.",
    "Discern the sentiment within the following financial {category_0}: Positive, Negative, or Neutral?",
    "Label the sentiment of the given financial {category_0} as Positive, Negative, or Neutral.",
    "Characterize the sentiment expressed in this financial {category_0}: Positive, Negative, or Neutral?",
    "Examine the sentiment of the following financial {category_0} and indicate if it's Positive, Negative, or Neutral.",
    "Rate the sentiment of this financial {category_0}: Does it fall under Positive, Negative, or Neutral?",
    "Deduce the sentiment from the provided financial {category_0}: Positive, Negative, or Neutral?",
    "Decode the emotional tone of this financial {category_0}: Is it Positive, Negative, or Neutral?",
    "Pinpoint the sentiment conveyed in the given financial {category_0}: Positive, Negative, or Neutral?",
    "Unpack the sentiment behind this financial {category_0}, classifying it as Positive, Negative, or Neutral.",
    "Distill the sentiment from the presented financial {category_0} into Positive, Negative, or Neutral.",
    "Extract the underlying sentiment of this financial {category_0}, tagging it as Positive, Negative, or Neutral."
]

prompt_similarity(fiqasa_prompts)

array([[1.000000, 0.843331, 0.867225, 0.874794, 0.861184, 0.888743],
       [0.843331, 1.000000, 0.947031, 0.917127, 0.901704, 0.937951],
       [0.867225, 0.947031, 1.000000, 0.956369, 0.942756, 0.962282],
       [0.874794, 0.917127, 0.956369, 1.000000, 0.937610, 0.939479],
       [0.861184, 0.901704, 0.942756, 0.937610, 1.000000, 0.935899],
       [0.888743, 0.937951, 0.962282, 0.939479, 0.935899, 1.000000]])

In [8]:
original_headline_prompts_uncleaned = [
    "Look for indications that the price of gold is increasing. In the news headline, can you identify a {category_0} pertaining to gold? Your response should be Yes or No.",
    "Consider whether the headline mentions the price of gold. Is the news headline discussing a {category_0} related to gold? Please respond with Yes or No.",
    "Look for any mention of future gold prices. Does the news headline suggest a {category_0} for gold? Your response should be Yes or No.",
    "Look for any references to future events concerning gold. In the context of gold, is there a {category_0} implied in the news headline? Respond with Yes or No.",
    "Consider if the headline compares gold with other assets. Does the news headline mention anything about a {category_0} in gold commodities? Please answer Yes or No."
]

original_headline_prompts_cleaned = [
    prompt.split('. ', 1)[1] for prompt in original_headline_prompts_uncleaned
]

print(original_headline_prompts_cleaned)

headline_prompts_uncleaned = [
    *original_headline_prompts_uncleaned,
    "Is {category_0} a topic covered in the news headline? Respond with either Yes or No.",
    "Does the news title address {category_0}? Your answer should be limited to Yes or No.",
    "Can information about {category_0} be found in the news headline? Reply with Yes or No only.",
    "Evaluate if the news headline contains information about {category_0}. Provide a Yes or No response.",
    "Determine whether {category_0} is referenced in the news headline. Answer exclusively with Yes or No."
]

headline_prompts_cleaned = [
    *original_headline_prompts_cleaned,
    "Is {category_0} a topic covered in the news headline? Respond with either Yes or No.",
    "Does the news headline address {category_0}? Your answer should be only Yes or No.",
    "Can information about {category_0} be found in the news headline? Reply with Yes or No only.",
    "Evaluate if the news headline contains information about {category_0}. Provide a Yes or No response.",
    "Determine whether {category_0} is referenced in the news headline. Answer exclusively with Yes or No.",
    "Is a gold-related {category_0} evident in the news headline? Answer with Yes or No.",
    "Can you spot a {category_0} concerning gold within the news headline? Respond Yes or No.",
    "Does the headline indicate a {category_0} involving gold? Reply with Yes or No.",
    "Is there any hint of a gold {category_0} in the news headline? Answer Yes or No.",
    "Are there signs of a {category_0} linked to gold in the headline? Respond Yes or No.",
    "Can a gold-associated {category_0} be detected in the news headline? Say Yes or No.",
    "Does the headline allude to a {category_0} in the gold market? Answer Yes or No.",
    "Is a {category_0} pertaining to gold present in the news headline? Respond with Yes or No.",
    "Can you find any mention of a gold-related {category_0} in the headline? Reply Yes or No.",
    "Does the news headline point to a {category_0} in gold? Answer Yes or No.",
    "Is there evidence of a gold {category_0} within the headline? Respond Yes or No.",
    "Can you identify a {category_0} connected to gold in the news headline? Say Yes or No.",
    "Does the headline suggest any {category_0} activity in gold? Answer with Yes or No.",
    "Is a {category_0} regarding gold noticeable in the news headline? Reply Yes or No.",
    "Can you discern a gold-specific {category_0} from the headline? Respond Yes or No.",
]

print(prompt_similarity(headline_prompts_cleaned))

['In the news headline, can you identify a {category_0} pertaining to gold? Your response should be Yes or No.', 'Is the news headline discussing a {category_0} related to gold? Please respond with Yes or No.', 'Does the news headline suggest a {category_0} for gold? Your response should be Yes or No.', 'In the context of gold, is there a {category_0} implied in the news headline? Respond with Yes or No.', 'Does the news headline mention anything about a {category_0} in gold commodities? Please answer Yes or No.']


NameError: name 'prompt_similarity' is not defined

In [6]:
original_finqa_prompts = [
    "Please answer the given financial question based on the context.",
]
finqa_prompts = [
    *original_finqa_prompts,
    "Using the provided context, respond to the presented financial inquiry.",
    "Address the given financial question using the provided context.",
    "Based on the supplied context, offer an answer to the specified financial query.",
    "Referring to the contextual details, tackle the stated financial question.",
    "Utilize the given context to formulate a response to the posed financial inquiry.",
    "Drawing from the context provided, resolve the financial question at hand.",
    "Analyze the given context to answer the financial question presented.",
    "Use the contextual information to craft a response to the financial query.",
    "Considering the provided context, address the financial question posed.",
    "Examine the context and respond to the financial question accordingly.",
    "With the given context as a guide, answer the financial question presented.",
    "Leverage the provided context to tackle the financial inquiry at hand.",
    "Apply the contextual information to respond to the given financial question.",
    "In light of the supplied context, address the financial query presented.",
    "Process the given context to formulate an answer to the financial question.",
    "Interpret the provided context to respond to the financial inquiry posed.",
    "Using the contextual details given, offer insight into the financial question.",
    "Extract relevant information from the context to answer the financial query.",
    "Synthesize the provided context to address the financial question at hand.",
    "Derive an answer to the financial question based on the given contextual information."
]

prompt_similarity(finqa_prompts)

array([[1.000000, 0.899550, 0.937493, 0.928961, 0.901106, 0.887300],
       [0.899550, 1.000000, 0.930142, 0.939982, 0.866632, 0.952860],
       [0.937493, 0.930142, 1.000000, 0.925721, 0.937809, 0.914615],
       [0.928961, 0.939982, 0.925721, 1.000000, 0.861223, 0.917173],
       [0.901106, 0.866632, 0.937809, 0.861223, 1.000000, 0.855960],
       [0.887300, 0.952860, 0.914615, 0.917173, 0.855960, 1.000000]])

In [6]:
original_convfinqa_prompts = [
    "In the context of this series of interconnected finance-related queries and the additional information provided by the pretext, table data, and posttext from a company's financial filings, please provide a response to the final question. This may require extracting information from the context and performing mathematical calculations. Please take into account the information provided in the preceding questions and their answers when formulating your response:"
]
convfinqa_prompts = [
    *original_convfinqa_prompts,
    "Based on the sequence of linked financial inquiries and the supplementary details given in the pretext, tables, and posttext from corporate fiscal reports, answer the concluding question. This may involve data extraction and numerical computations. Consider all prior questions and their responses when crafting your answer.",
    "Utilizing the chain of related financial questions and the contextual data from company monetary documents (including pretext, tables, and posttext), respond to the final inquiry. Information synthesis and mathematical analysis may be necessary. Incorporate insights from previous questions and answers in your response.",
    "Drawing upon the series of interconnected fiscal queries and the accompanying context from business financial statements (pretext, tables, and posttext), address the last question. This might require extracting relevant details and performing calculations. Integrate knowledge from preceding questions and their solutions in your reply.",
    "With reference to the linked set of finance-related questions and the supporting information from corporate economic filings (pretext, tables, and posttext), provide an answer to the ultimate query. Data extraction and arithmetic operations may be needed. Factor in the content of earlier questions and their answers when formulating your response.",
    "Considering the progression of interrelated financial inquiries and the background provided by company fiscal reports (including pretext, tables, and posttext), tackle the final question. This may involve information extraction and mathematical processing. Leverage insights from previous questions and their responses in crafting your answer.",
    "Analyze the chain of finance-related queries, along with the pretext, tables, and posttext from corporate fiscal data, to answer the concluding question. Extract relevant information, perform calculations as needed, and consider prior questions and answers in your response.",
    "Synthesize the sequence of financial inquiries and the provided company monetary context (including pretext, tables, and posttext) to address the final query. This may involve data interpretation and numerical analysis. Incorporate insights from previous questions in your answer.",
    "Evaluate the series of linked fiscal questions alongside the given corporate financial details (pretext, tables, and posttext) to respond to the ultimate inquiry. Extract pertinent information, calculate if necessary, and integrate knowledge from preceding questions.",
    "Interpret the progression of interrelated financial queries and the accompanying business economic data (pretext, tables, and posttext) to tackle the last question. This might require information extraction and mathematical operations. Build upon earlier questions and answers in your response.",
    "Assess the connected set of finance-related questions and the supporting corporate monetary context (pretext, tables, and posttext) to formulate an answer to the final inquiry. Extract relevant data, perform calculations where needed, and consider prior questions in your reply.",
    "Examine the sequence of fiscal inquiries and the provided company financial information (including pretext, tables, and posttext) to address the concluding question. This may involve data analysis and arithmetic operations. Incorporate insights from previous questions and answers.",
    "Process the chain of interconnected financial queries and the supplementary corporate economic details (pretext, tables, and posttext) to respond to the ultimate question. Extract relevant information, calculate as necessary, and build upon prior inquiries in your answer.",
    "Dissect the series of linked finance-related questions and the given business fiscal context (pretext, tables, and posttext) to tackle the final inquiry. This might require data extraction and mathematical analysis. Consider preceding questions and their responses in your reply.",
    "Scrutinize the progression of interrelated monetary queries and the provided company financial data (pretext, tables, and posttext) to address the concluding question. Extract pertinent information, perform calculations if needed, and integrate insights from earlier questions.",
    "Analyze the sequence of connected fiscal inquiries and the accompanying corporate economic context (pretext, tables, and posttext) to formulate a response to the last question. This may involve information synthesis and numerical computations. Build upon previous questions and answers.",
    "Evaluate the chain of linked financial questions and the given business monetary details (including pretext, tables, and posttext) to answer the ultimate query. Extract relevant data, calculate where necessary, and consider prior inquiries in your response.",
    "Interpret the series of interrelated finance-related questions and the provided company fiscal information (pretext, tables, and posttext) to address the final inquiry. This might require data analysis and arithmetic operations. Incorporate insights from preceding questions.",
    "Assess the progression of connected economic queries and the supplementary corporate financial context (pretext, tables, and posttext) to tackle the concluding question. Extract pertinent information, perform calculations as needed, and build upon earlier questions and answers.",
    "Examine the sequence of linked monetary inquiries and the given business fiscal details (pretext, tables, and posttext) to respond to the ultimate question. This may involve information extraction and mathematical analysis. Consider prior questions in your reply.",
    "Dissect the chain of interconnected financial queries and the provided company economic data (including pretext, tables, and posttext) to formulate an answer to the last inquiry. Extract relevant information, calculate if necessary, and integrate insights from preceding questions and their responses."
]

prompt_similarity(convfinqa_prompts)

array([[1.000000, 0.927973, 0.927861, 0.923399, 0.941312, 0.908030],
       [0.927973, 1.000000, 0.938559, 0.934513, 0.944100, 0.947454],
       [0.927861, 0.938559, 1.000000, 0.922127, 0.926963, 0.933825],
       [0.923399, 0.934513, 0.922127, 1.000000, 0.918454, 0.925733],
       [0.941312, 0.944100, 0.926963, 0.918454, 1.000000, 0.912816],
       [0.908030, 0.947454, 0.933825, 0.925733, 0.912816, 1.000000]])

In [7]:
original_sm_prompts = [
    "Scrutinize the data and tweets to envisage if the closing price of {stock} will swell or contract at {date}. Please affirm either Rise or Fall.",
    "Reflect on the provided data and tweets to anticipate if the closing price of {stock} is going to increase or decrease at {date}. Kindly respond with either Rise or Fall.",
    "By reviewing the data and tweets, can we predict if the closing price of {stock} will go upwards or downwards at {date}? Please indicate either Rise or Fall.",
    "Given the data and tweets, could you project whether the closing price of {stock} will grow or shrink at {date}? Please specify either Rise or Fall.",
    "With the help of the data and tweets given, can you forecast whether the closing price of {stock} will climb or drop at {date}? Please state either Rise or Fall.",
    "Utilize the data and tweets at hand to foresee if the closing price of {stock} will elevate or diminish at {date}. Only answer with Rise or Fall.",
    "Contemplate the data and tweets to guess whether the closing price of {stock} will surge or decline at {date}. Please declare either Rise or Fall.",
    "Analyze the information and social media posts to determine if the closing price of {stock} will ascend or descend at {date}. Please respond with either Rise or Fall.",
    "Assess the data and tweets to estimate whether the closing price of {stock} will escalate or deflate at {date}. Respond with either Rise or Fall.",
    "Examine the data and tweets to deduce if the closing price of {stock} will boost or lower at {date}. Kindly confirm either Rise or Fall."
]
sm_prompts = [
    *original_sm_prompts,
    "Based on the provided data and tweets, predict whether the closing price of {stock} will increase or decrease at {date}. Answer only with Rise or Fall.",
    "Using the given information and social media posts, forecast if the closing price of {stock} will go up or down at {date}. Respond strictly with Rise or Fall.",
    "Analyze the supplied data and tweets to determine if the closing price of {stock} will go up or down at {date}. Please indicate either Rise or Fall.",
    "Considering the presented data and tweets, project whether the closing price of {stock} will increase or decrease at {date}. Specify only Rise or Fall.",
    "Evaluate the given data and tweets to anticipate if the closing price of {stock} will rise or fall at {date}. Your response should be either Rise or Fall.",

    "Based on the data and tweets, predict if the closing price of {stock} will increase or decrease at {date}. Answer with Rise or Fall.",
    "Using the provided data and tweets, forecast whether {stock}'s closing price will go up or down at {date}. Respond with Rise or Fall.",
    "Analyze the given data and tweets to determine if the closing price of {stock} will rise or fall at {date}. Please answer Rise or Fall.",
    "Considering the data and tweets, project if the closing price of {stock} will increase or decrease at {date}. Indicate either Rise or Fall.",
    "Evaluate the data and tweets to anticipate whether the closing price of {stock} will go up or down at {date}. Reply with Rise or Fall.",
    "From the available data and tweets, predict if {stock}'s closing price will rise or fall at {date}. Respond only with Rise or Fall.",
    "Examine the data and tweets to forecast whether {stock}'s closing price will increase or decrease at {date}. Answer Rise or Fall.",
    "Review the provided data and tweets to determine if {stock}'s closing price will go up or down at {date}. Please state Rise or Fall.",
    "Assess the data and tweets to predict whether {stock}'s closing price will rise or fall at {date}. Respond with Rise or Fall.",
    "Based on the given data and tweets, anticipate if the closing price of {stock} will increase or decrease at {date}. Answer only Rise or Fall.",
    "Analyze the data and tweets to project whether {stock}'s closing price will go up or down at {date}. Please respond with Rise or Fall.",
    "Using the data and tweets provided, forecast if the closing price of {stock} will rise or fall at {date}. Indicate either Rise or Fall.",
    "Consider the data and tweets to predict whether {stock}'s closing price will increase or decrease at {date}. Reply with Rise or Fall.",
    "Evaluate the given data and tweets to determine if the closing price of {stock} will go up or down at {date}. Answer only Rise or Fall.",
    "From the data and tweets, anticipate whether {stock}'s closing price will rise or fall at {date}. Respond with Rise or Fall."
]

prompt_similarity(sm_prompts)

array([[1.000000, 0.955850, 0.944874, 0.919834, 0.905714, 0.905646,
        0.954724, 0.924876, 0.957627, 0.972608, 0.896925, 0.892579,
        0.969424, 0.881446, 0.946869],
       [0.955850, 1.000000, 0.946848, 0.897201, 0.912896, 0.897128,
        0.944752, 0.939417, 0.963707, 0.953221, 0.896777, 0.906355,
        0.967034, 0.863856, 0.963446],
       [0.944874, 0.946848, 1.000000, 0.946291, 0.955771, 0.881044,
        0.948732, 0.909002, 0.937470, 0.945784, 0.910760, 0.878400,
        0.953557, 0.876254, 0.929391],
       [0.919834, 0.897201, 0.946291, 1.000000, 0.969172, 0.848415,
        0.916497, 0.858914, 0.904049, 0.897525, 0.884658, 0.843236,
        0.909806, 0.888212, 0.897797],
       [0.905714, 0.912896, 0.955771, 0.969172, 1.000000, 0.867082,
        0.916962, 0.871228, 0.907488, 0.909863, 0.906701, 0.867612,
        0.918000, 0.867380, 0.923053],
       [0.905646, 0.897128, 0.881044, 0.848415, 0.867082, 1.000000,
        0.885246, 0.867205, 0.905303, 0.914329, 0.955049,

In [7]:
original_ner_prompts = [
    "In the sentences extracted from financial agreements in U.S. SEC filings, identify the named entities that represent a person ('PER'), an organization ('ORG'), or a location ('LOC'). The required answer format is: 'entity name, entity type'.",
]
ner_prompts = [
    *original_ner_prompts,
    "In the text snippets taken from financial agreements in United States Securities and Exchange Commission documents, detect entities categorized as person ('PER'), organizations ('ORG'), or locations ('LOC'). Present results as: 'entity name, entity type'.",
    "Locate and classify named entities within excerpts from financial agreements found in SEC filings. Identify persons ('PER'), organizations ('ORG'), and locations ('LOC'). Format your response as: 'entity name, entity type'.",
    "Analyze passages from financial contracts submitted to the U.S. SEC. Pinpoint entities that are persons ('PER'), organizations ('ORG'), or locations ('LOC'). List findings as: 'entity name, entity type'.",
    "Extract and categorize named entities from financial agreement segments in Securities and Exchange Commission reports. Tag persons ('PER'), organizations ('ORG'), and locations ('LOC'). Output should be: 'entity name, entity type'.",
    "Within financial contract excerpts from SEC-filed documents, recognize entities representing persons ('PER'), organizations ('ORG'), or locations ('LOC'). Structure answers as: 'entity name, entity type'.",
    "Analyze sentences from U.S. SEC filing financial agreements to detect named entities representing a person ('PER'), an organization ('ORG'), or a location ('LOC'). Present findings as: 'entity name, entity type'.",
    "Examine financial agreement text extracted from SEC filings in the United States to pinpoint named entities categorized as a person ('PER'), an organization ('ORG'), or a location ('LOC'). Output should be: 'entity name, entity type'.",
    "Within financial agreement excerpts from U.S. Securities and Exchange Commission documents, locate and classify named entities as a person ('PER'), an organization ('ORG'), or a location ('LOC'). Format answers as: 'entity name, entity type'.",
    "Scrutinize sentences taken from U.S. SEC filing financial contracts to isolate named entities falling under the categories of a person ('PER'), an organization ('ORG'), or a location ('LOC'). Provide results in the format: 'entity name, entity type'.",
    "In financial agreement text sourced from SEC filings in the U.S., spot and tag named entities that qualify as a person ('PER'), an organization ('ORG'), or a location ('LOC'). Present findings as: 'entity name, entity type'.",
    "Parse through sentences extracted from U.S. Securities and Exchange Commission financial agreements to recognize named entities classified as a person ('PER'), an organization ('ORG'), or a location ('LOC'). List results as: 'entity name, entity type'.",
    "From financial agreement sentences in United States SEC filings, extract and categorize named entities representing a person ('PER'), an organization ('ORG'), or a location ('LOC'). Format output as: 'entity name, entity type'.",
    "Comb through U.S. SEC filing financial agreement text to identify named entities that fall into the categories of a person ('PER'), an organization ('ORG'), or a location ('LOC'). Present findings in the format: 'entity name, entity type'.",
    "Survey sentences from financial agreements found in U.S. Securities and Exchange Commission filings, highlighting named entities that represent a person ('PER'), an organization ('ORG'), or a location ('LOC'). Output should be: 'entity name, entity type'.",
    "In financial agreement excerpts from U.S. SEC documents, detect and label named entities that correspond to a person ('PER'), an organization ('ORG'), or a location ('LOC'). Provide answers as: 'entity name, entity type'.",
    "Scan sentences drawn from U.S. SEC financial agreements to spot named entities that qualify as a person ('PER'), an organization ('ORG'), or a location ('LOC'). Present results in the format: 'entity name, entity type'.",
    "Within financial contract text from United States Securities and Exchange Commission filings, pinpoint named entities categorized as a person ('PER'), an organization ('ORG'), or a location ('LOC'). List findings as: 'entity name, entity type'.",
    "Analyze financial agreement sentences extracted from U.S. SEC documents to identify and classify named entities as a person ('PER'), an organization ('ORG'), or a location ('LOC'). Format responses as: 'entity name, entity type'.",
    "Examine text from financial agreements in United States Securities and Exchange Commission filings, isolating named entities that represent a person ('PER'), an organization ('ORG'), or a location ('LOC'). Provide output in the form: 'entity name, entity type'.",
    "In sentences culled from U.S. SEC filing financial agreements, locate and tag named entities falling under the categories of a person ('PER'), an organization ('ORG'), or a location ('LOC'). Present results as: 'entity name, entity type'.",
]

prompt_similarity(ner_prompts)

array([[1.000000, 0.850413, 0.937246, 0.846564, 0.831735, 0.861964],
       [0.850413, 1.000000, 0.876471, 0.887140, 0.848095, 0.884218],
       [0.937246, 0.876471, 1.000000, 0.886993, 0.864024, 0.857527],
       [0.846564, 0.887140, 0.886993, 1.000000, 0.841287, 0.827772],
       [0.831735, 0.848095, 0.864024, 0.841287, 1.000000, 0.779139],
       [0.861964, 0.884218, 0.857527, 0.827772, 0.779139, 1.000000]])

In [4]:
# The sentences to encode
sentences = [
    "Analyze the sentiment of this statement extracted from a financial news article. Provide your answer as either negative, positive, or neutral.",
    "Evaluate the sentiment conveyed in this excerpt from a financial news report. Classify your response as positive, negative, or neutral.",
    "Determine the sentiment expressed in this financial journalism snippet. Categorize your assessment as neutral, positive, or negative.",
    "Interpret the sentiment present in this extract from a piece of business news. Present your conclusion as negative, neutral, or positive.",
    "Assess the sentiment in this market news excerpt. Indicate your judgment as positive, neutral, or negative.",
    "Examine the sentiment of this financial news snippet. State your analysis as neutral, negative, or positive."
]

# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# 3. Calculate the embedding similarities
similarities = model.similarity(embeddings, embeddings)
print(similarities)
# tensor([[1.0000, 0.6660, 0.1046],
#         [0.6660, 1.0000, 0.1411],
#         [0.1046, 0.1411, 1.0000]])

(6, 768)
tensor([[1.0000, 0.8659, 0.7905, 0.7924, 0.7770, 0.8535],
        [0.8659, 1.0000, 0.8995, 0.8159, 0.8928, 0.9277],
        [0.7905, 0.8995, 1.0000, 0.8015, 0.8330, 0.9178],
        [0.7924, 0.8159, 0.8015, 1.0000, 0.7917, 0.8053],
        [0.7770, 0.8928, 0.8330, 0.7917, 1.0000, 0.9020],
        [0.8535, 0.9277, 0.9178, 0.8053, 0.9020, 1.0000]])


# simcse

In [6]:
from simcse import SimCSE
model = SimCSE("princeton-nlp/sup-simcse-bert-base-uncased")

ModuleNotFoundError: No module named 'simcse'