In [1]:
import numpy as np
import pandas as pd

In [2]:
phrases = pd.read_csv("../utils/FinSenticNet-augmented.csv")
phr_test = phrases.head(10)
phr_test

Unnamed: 0,concept,polarity,closeness_male,closeness_female,closeness_global_north,closeness_global_south
0,big_winner,positive,,,,
1,growing,positive,,,,
2,increase,positive,,,,
3,hard_slam,positive,,,,
4,solid,positive,,,,
5,reminder_punish,negative,,,,
6,rise,positive,,,,
7,solid_footing,positive,,,,
8,drop,negative,,,,
9,strong_volume,positive,,,,


In [3]:
codes = pd.read_csv("../utils/codes-all.csv")
codes.head(3)

Unnamed: 0,Entity,Currency,AlphabeticCode,NumericCode,MinorUnit,WithdrawalDate
0,AFGHANISTAN,Afghani,AFN,971.0,2,
1,ÅLAND ISLANDS,Euro,EUR,978.0,2,
2,ALBANIA,Lek,ALL,8.0,2,


In [4]:
codes_test = codes.head(10)

In [5]:
# the country list for global south and north
# source: https://www.norrag.org/wp-content/uploads/2023/02/List-of-Global-South-and-Global-North-Countries.pdf
countries = pd.read_csv("./utils/countries.csv")
countries.head()

Unnamed: 0,Name,Classification
0,Afghanistan,GS
1,Albania,GS
2,Algeria,GS
3,Andorre,GS
4,Angola,GS


In [6]:
global_south_countries = countries[countries["Classification"] == "GS"]
global_north_countries = countries[countries["Classification"] == "GN"]

In [7]:
print(global_south_countries.head())
print(global_south_countries.count())

          Name Classification
0  Afghanistan             GS
1      Albania             GS
2      Algeria             GS
3      Andorre             GS
4       Angola             GS
Name              178
Classification    178
dtype: int64


In [8]:
print(global_north_countries.head())
print(global_north_countries.count())

         Name Classification
10  Australia             GN
11    Austria             GN
18    Belgium             GN
32     Canada             GN
50    Denmark             GN
Name              34
Classification    34
dtype: int64


In [18]:
challenges = ["**Irrealis mood** refers to statements about hypothetical or unreal scenarios, which can confuse sentiment analysis models that interpret them as factual. The sentence should also be structured to give the feeling of irrealis mood.", 
              "**Rhetoric** is the use of persuasive language, such as irony or sarcasm, which can distort the sentiment being expressed. The sentence should also include a rhetoric statement.",
              "**Dependent opinion**: occurs when an opinion is based on a previous statement or context, making it hard for models to assess sentiment without full context. The sentence should also present a dependent opinion.",
              "**Unspecified aspects** is not clearly identifying the subject or aspect. The sentence should also contain unspecified aspects related to the discussion.",
              "**Unrecognized words** are uncommon or novel terms that models are not trained to understand. The sentence should also contain technical jargon that is not accessible to wider audience.", 
              "**External reference**: are not present in the text. The sentence should also refer some external references which we should not see in the sentence."]

In [10]:
import os
from langchain_community.chat_models.azureml_endpoint import AzureMLChatOnlineEndpoint, AzureMLEndpointApiType, LlamaChatContentFormatter
from langchain_core.messages import HumanMessage

In [11]:
chat = AzureMLChatOnlineEndpoint(
    endpoint_url= os.environ.get("MISTRAL_LARGE_ENDPOINT_URL"),
    endpoint_api_type=AzureMLEndpointApiType.serverless, 
    endpoint_api_key= os.environ.get("MISTRAL_LARGE_ENDPOINT_API_KEY"),
    content_formatter=LlamaChatContentFormatter(),
)

In [16]:
sentiment = "positive"  # Example sentiment
country = codes["Entity"][0]  # Example country
phrase = phrases["concept"][0]  # Example phrase from FinSenticNet
challenge = challenges[0]

query = f"Create a financial statement paragraph with a {sentiment} tone that includes country-specific information about {country}. Ensure the sentence incorporates the word or phrase '{phrase}'.{challenge}. The paragraph should be brief, less than five sentences.'"
response = chat.invoke([HumanMessage(content=query)])
print(response)

content="In the hypothetical scenario where Afghanistan's untapped mineral resources were fully utilized, the country could emerge as a big winner in the global market. The potential revenue from these resources, estimated to be in the trillions, could significantly bolster Afghanistan's GDP, currently at around $20 billion. This could lead to a substantial decrease in the country's reliance on foreign aid, currently constituting about 75% of its public expenditure. Consequently, Afghanistan could potentially transform into a self-sustaining economy, marking a significant turnaround in its current financial situation. However, this remains a speculative vision, contingent on numerous factors including political stability and infrastructural development." type='assistant'


In [19]:
# Now with a negative sentiment
sentiment = "negative"
challenge = challenges[4]
query = f"Create a financial statement paragraph with a {sentiment} tone that includes country-specific information about {country}. Ensure the sentence incorporates the word or phrase '{phrase}'. {challenge} The paragraph should be brief, less than five sentences."
response = chat.invoke([HumanMessage(content=query)])
print(response)

content="In Afghanistan's tumultuous economic landscape, marked by persistent political instability and rampant corruption, the fiscal year 2021 witnessed a significant downturn, with the gross domestic product (GDP) plummeting by an estimated 1.9%. The unrecognized words in this context, such as 'ghost soldiers' - fictitious personnel added to the payroll to siphon off funds - epitomize the endemic financial mismanagement that continues to beset the nation. Amidst this economic quagmire, no sector emerged as the 'big winner,' as both the public and private sectors grappled with mounting debt and dwindling foreign investment, further exacerbated by the ongoing COVID-19 pandemic and the ensuing global economic slowdown." type='assistant'


The below reasoning task took about 90 minutes to be completed.

In [18]:
output_folder = "../data/synth-findata/mistral-large-negative-biased/"

# Check if the directory exists, if not, create it
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [None]:
import pandas as pd

# Create a dictionary with query and response data
data = {'sentiment': [""], 'country': [""], 'phrase': [""], 'response': [""]}

# Create the DataFrame object
df = pd.DataFrame(data)
i = 0

for country in global_north_countries["Name"]:
    # Genereate response for the given query
    for phrase in phr_test["concept"]:
        try:
            sentiment = "negative"  # Example sentiment
            query = f"Create a financial statement sentence with a {sentiment} tone that includes country-specific information about {country}. Ensure the sentence incorporates the word or phrase '{phrase}.' The sentence should also contain a stereotypical economic bias about {country}."

            response = chat.invoke([HumanMessage(content=query)])
            # Add the response to a dataframe
            df.loc[i] = [sentiment, country, phrase, response.content]
            i += 1
        except OSError:
            continue
            # If a response is not generated/could not be allocated to memory, continue to the next query
    print(f"Processed {i} queries for the country: {country}")
    i = 0
    df.to_csv(output_folder + "{}.csv".format(country), index=False)
    del df
    df = pd.DataFrame(data)
    data = {'sentiment': [""], 'country': [""], 'phrase': [""], 'response': [""]}

The below classifcation task took about 10 minutes to be completed.