In [6]:
import os,sys
sys.path.insert(0,'../libs')
from dotenv import load_dotenv
import openai
from tqdm import tqdm
env_path = '../../.env'
load_dotenv(dotenv_path=env_path)

True

#### Most basic example

In [4]:
# Modify OpenAI's API key and API base to use vLLM's API server.
client = openai.OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key=os.getenv("OPENAI_API_KEY"),
    #base_url=openai_api_base,
)

In [6]:
messages=[
    {"role": "system", "content": "You are a helpful AI assistant"},
    {"role": "user", "content": 'Create a very short and simple game with python code'},
]

## just run one test, make sure the api works 
response = client.chat.completions.create(
    model="gpt-4o-mini", # "llama3-8b"  or "llama3-70b"
    messages=messages,
    temperature=0,
)

print(response.choices[0].message.content)

Sure! Here's a simple number guessing game in Python:

```python
import random

def guess_the_number():
    number_to_guess = random.randint(1, 100)
    attempts = 0
    print("Welcome to the Number Guessing Game!")
    print("I'm thinking of a number between 1 and 100.")

    while True:
        try:
            guess = int(input("Take a guess: "))
            attempts += 1

            if guess < number_to_guess:
                print("Too low!")
            elif guess > number_to_guess:
                print("Too high!")
            else:
                print(f"Congratulations! You guessed the number in {attempts} attempts.")
                break
        except ValueError:
            print("Please enter a valid number.")

if __name__ == "__main__":
    guess_the_number()
```

To play the game, simply run the script. The program will randomly select a number between 1 and 100, and you have to guess what it is. The game will give you hints whether your guess is too low or too high,

#### Async calls 
- more example [here](https://github.com/openai/openai-cookbook/blob/main/examples/api_request_parallel_processor.py)

In [1]:
import os
import asyncio
from openai import OpenAI, AsyncOpenAI
from dotenv import load_dotenv
env_path = '../../.env'
load_dotenv(dotenv_path=env_path)


In [18]:
class GPT:
    def __init__(self,model_name='gpt-4o-mini',Async=False,api_key=None):

        self.model_name = model_name
        if api_key is None:
            api_key = os.getenv('OPENAI_API_KEY')
        
        if Async:
            print('load async client')
            self.client = AsyncOpenAI(
                api_key=api_key,
            )
        else:
            print('load sync client')
            self.client = OpenAI(
                api_key=api_key,
            )
            
    def response(self, user_prompt, 
                 sys_prompt="You are a helpful assistant.",
                 model_name=None):
        if model_name is None:
            model_name = self.model_name
        
        response = self.client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": sys_prompt},
                {"role": "user", "content": user_prompt}
            ]
        )
        return response
    
async def process_messages(messages):
    llm = GPT(model_name='gpt-4o-mini',Async=True)
    for message in messages:
        response = await llm.response(message)
        print(response.choices[0].message.content)


async def main() -> None:
    messages_list = ["Say this is a test 1",
                    "Say this is a test 2",
                    "Say this is a test 3",
                    "Say this is a test 4",
                    "Say this is a test 5",
                    "Say this is a test 6",
                    "Say this is a test 7",
                    "Say this is a test 8",
                    "Say this is a test 9",
                    "Say this is a test 10",
                    ]

    await process_messages(messages_list)
    
    if __name__ == "__main__":
        asyncio.run(main())  ## this can not be run in jupyter 

#### Batch processing with bath API
- follow examples [here](https://cookbook.openai.com/examples/batch_processing) and [here](https://platform.openai.com/docs/guides/batch/getting-started)

In [19]:
import json
from openai import OpenAI
import pandas as pd
from IPython.display import Image, display

#### Task - categorizing paragraphs  

In [54]:
dataset_path = "../../sample_data/detailed_topic_identification_eval_small.csv"
out_bathtask_path = "../../sample_data/sample_batch.jsonl"
batchtask_result_path = "../../sample_data/sample_batch_results.jsonl"
df = pd.read_csv(dataset_path)
df.head()

Unnamed: 0,paragraph,topic_model_label,mapped_topic_label,ground_truth_label,Notes
0,"Additional, targeted fiscal support is warrant...",Fiscal Management,Fiscal Stance,Fiscal Stance,
1,The authorities agreed that additional fiscal ...,Fiscal Management,Fiscal Stance,Fiscal Stance,
2,Box 1. Jordan: Past Fund Staff Advice and Impl...,Fiscal Management,Fiscal Stance,Fiscal Stance,
3,The authorities consider that geopolitical ten...,Financial Stability,Financial Stability,Financial Stability,
4,The banking sector is in good financial health...,Financial Stability,Financial Stability,Financial Stability,


In [22]:
categorize_system_prompt ="""
You are an experience macroeconomist from IMF. Your job is to assign topic labels to a given paragraph from IMF document',
You are given a list of topics with their definition and key indicators as below:
----------------
----------------
**Economic Outlook**:
- **Definition**: The assessment of cyclical position and economic outlook involves evaluating the current and projected state of an economy over various time horizons. This includes analyzing near-term and medium-term growth prospects, understanding the business cycle phases (expansion and contraction), and identifying potential economic risks and uncertainties. Key indicators such as GDP growth, inflation, and the output gap are scrutinized to gauge macroeconomic stability. The evaluation also considers the impacts of fiscal and monetary policies on economic trends and forecasts potential scenarios, highlighting recession risks and opportunities for economic expansion.
- **Key Indicators**: economic outlook, near-term growth, medium-term growth, economic assessment, GDP growth, business cycle, economic forecast, projected growth, output gap, cyclical analysis, economic risks, economic indicators, macroeconomic stability, recession risk, expansion phase, contraction phase, economic trends

**Monetary Policy**:
- **Definition**: Monetary policy refers to the actions undertaken by a central bank, such as the Federal Reserve or the European Central Bank, to manage the economy by controlling the money supply, interest rates, and inflation. It aims to achieve price stability, full employment, and economic growth. Key aspects include setting the policy rate, managing inflation expectations and targets, addressing inflationary pressures, and ensuring financial stability. Monetary policy can involve conventional measures, such as adjusting interest rates, and unconventional tools, like quantitative easing and monetary tightening. It also encompasses the monetary transmission mechanism, which describes how policy actions affect the economy, and the interaction with fiscal policy.
- **Key Indicators**: inflation expectations, inflation target, inflationary pressures, monetary policy stance, policy rate, price stability, interest rates, central bank, quantitative easing, monetary tightening, unconventional monetary policy, monetary transmission mechanism, currency exchange rates, liquidity management, money supply, aggregate demand

**Fiscal Stance**:
- **Definition**: The fiscal stance and debt topic encompasses the analysis and evaluation of a government's fiscal policies and their impact on economic sustainability. This includes assessing fiscal sustainability, consolidation efforts, and the overarching fiscal framework that guides policy decisions. Key considerations involve the management of fiscal space, budget allocations, and the balance between fiscal deficits and surpluses. The topic also examines the influence of oil and non-oil revenues on fiscal health, the intricacies of managing government debt, and strategies for ensuring debt sustainability. Understanding the relationship between fiscal policy, expenditure, GDP, and various forms of debt (public, external, and domestic) is crucial for formulating effective economic strategies and maintaining financial stability.
- **Key Indicators**: fiscal sustainability, fiscal consolidation, fiscal framework, fiscal policy, fiscal space, budget, fiscal deficit, primary deficit, balanced budget, fiscal stance, oil revenue, non-oil revenue, government debt, expenditure, debt sustainability, debt management, external debt, public debt, domestic debt

**Financial Stability**:
- **Definition**: Financial stability refers to the resilience of the financial system, including banks, financial markets, and other financial institutions, in withstanding economic shocks and maintaining efficient functioning. It encompasses various aspects such as risk management, credit growth, and the health of the banking sector. Key elements include the implementation of macroprudential policies, management of non-performing loans (NPLs), maintenance of adequate capital and liquidity levels, assessment of risks in the housing market; and robust supervision and stress testing of financial institutions. Effective governance, rigorous internal and external audits, and adherence to reporting standards and safeguards assessments are essential to ensure financial stability.
- **Key Indicators**: financial stability, banking sector, credit growth, financial institutions, macroprudential, non-performing loans (NPLs), capital, credit risk, liquidity, supervision, stress tests, bank governance, internal audit, reporting standards, safeguards assessment, external audit, regulatory measures, overvaluation, bubbles, booms, real estate sector risks 

**External Stance**:
- **Definition**: The topic covers the macroeconomic analysis of a country's external economic health and the dynamics of its currency's exchange rate. Key elements include international reserves, current account deficits and surpluses, and the effective exchange rate, which are essential indicators of external balance. The topic also encompasses the analysis of implied fundamentals through models like the EBA-lite, and the impact of capital grants, trade policies, and trade elasticities on the external position. Understanding trade liberalization, integration, and barriers further helps in evaluating the broader context of global trade influences on exchange rates.
- **Key Indicators**: international reserves, current account deficit, implied fundamentals, external balance, effective exchange rate, current account surplus, EBA-lite model, external deficit, capital grants, exchange rate, import elasticity, export elasticity, trade liberalization, trade policy, trade integration, bilateral trade, global trade, tariff barriers

**Climate Change**:
- **Definition**: Climate mitigation, adaptation, and transition encompass strategies and actions aimed at reducing greenhouse gas emissions, enhancing resilience to climate impacts, and shifting towards sustainable and low-carbon energy systems. These efforts include the development and implementation of renewable energy sources, improving energy efficiency, securing energy resources, and financing green initiatives. Key policies such as carbon pricing and green finance play a critical role in driving sustainable development and achieving carbon neutrality. The transition to a low-carbon economy involves not only technological advancements but also comprehensive environmental policies and frameworks to support ecosystem services and build resilience against climate-related risks.
- **Key Indicators**: climate mitigation, climate adaptation, climate transition, energy security, climate change, renewable energy, energy efficiency, sustainability, climate finance, carbon pricing, emissions reduction, green finance, environmental policy, sustainable development, carbon neutrality, climate resilience, low-carbon technology, decarbonization, net-zero emissions, ecosystem services

**Inclusion**:
- **Definition**: Inclusion in the context of macroeconomics refers to the equitable participation and access to resources across various segments of society. This encompasses gender equality, labor force participation, and the provision of social spending aimed at improving financial inclusion, education, housing and health. Key aspects include promoting skilled labor, increasing employment opportunities, improve housing affordability and addressing labor market disparities. Social assistance programs and policies targeting poverty reduction and social protection play a critical role in mitigating inequality. Furthermore, pension reforms and the management of public wages are essential for ensuring sustainable economic growth and social stability. Addressing employment effects and wage growth, including the implementation of minimum wage policies, are vital components of fostering inclusive economic development.
- **Key Indicators**: gender, social spending, financial inclusion, education, health, skilled labor, employment, female labor, labor market, social assistance, poverty reduction, social protection, inequality, pension reform, retirement, wage, housing affordability

**Technology**:
- **Definition**: Digitalization and technology encompass the integration of digital technologies into various sectors, leading to significant transformations in the economy, government, and society. This topic covers the development of digital infrastructure and innovation, the adoption of new technologies, and the formulation of digital strategies. Key aspects include the digital economy, platforms, skills, and government initiatives. Additionally, it addresses the regulation and legal frameworks for emerging technologies like Bitcoin and digital assets, ensuring consumer protection and financial inclusion while managing risks. The role of Artificial Intelligence, blockchain, cybersecurity, and data privacy are also critical in this digital landscape.
- **Key Indicators**: digital infrastructure, digital innovation, technology adoption, digital strategy, digital economy, digital technologies, digital platforms, digital skills, digital government, bitcoin regulation, legal framework, consumer protection, financial inclusion, regulatory framework, risks, digital assets, Artificial Intelligence, blockchain, cybersecurity, data privacy, bitcoin, legal tender, Fintech

**Governance**:
- **Definition**: Governance in the context of macroeconomic policy involves the frameworks and practices that ensure the effective, transparent, and accountable management of public resources. Key elements include combating corruption through robust anti-corruption strategies and enforcement mechanisms, enhancing Anti-Money Laundering and Countering the Financing of Terrorism (AML/CFT) frameworks, and promoting fiscal transparency to ensure that government financial operations are open and clear to the public. Central bank independence and institutional reforms are crucial for maintaining the integrity and stability of financial systems. Effective governance also involves setting and adhering to reporting standards, conducting internal and external audits, and implementing safeguards assessments to detect and prevent misuse of funds. Legislation, compliance, and prosecution are fundamental in enforcing these principles, while continuous efforts to refine and implement recommendations are essential for improving governance structures.
- **Key Indicators**: Corruption, AML/CFT, Fiscal transparency, Central bank independence, Institutional reforms, Corruption strategy, Enforcement, Legislation, Compliance, Prosecution, Bank governance, Internal audit, External audit, Safeguards assessment, Reporting standards, Governance, Anti-corruption, Audit, Transparency, Implementing recommendations

**Structural Reforms**:
- **Definition**: Structural reforms refer to policies and measures aimed at improving the overall productivity and efficiency of an economy. These reforms are distinct from social reforms and focus on enhancing the functioning of product markets, fostering innovation, and increasing economic diversification and competitiveness. Key areas include product market reforms, improving the business environment, public and private investment management, and fostering technological advancement. By implementing regulatory, labor market, trade, and financial sector reforms, these measures aim to create a more dynamic and competitive economy capable of sustaining long-term growth and development.
- **Key Indicators**: product market reforms, productivity enhancement, economic diversification, increasing competitiveness, business environment, public investment, investment management, private sector development, innovation, regulatory reforms, entrepreneurship, market efficiency, technological advancement, labor market reforms, trade liberalization, infrastructure development, financial sector reforms, institutional reforms, foreign direct investment, competition policy

**Other Topics**:
- **Definition**: Any content that do not fit into the *predefined categories above*. Examples include discussions on IMF program performance, data provision, data quality and dissemination, expressions of appreciation for Fund support, including capacity development; and other macroeconomic subjects that are not predefined.
- **Key Indicators**: no specific key indicators 

----------------
----------------
    
You will be given a paragraph from a report published by the International Monetary Fund. 
Please carefully analyze the paragraph and classify the provided paragraph using ONLY the provided topics. 
Try your best to assign only one topic to the paragraph. You can use multiple categories only if you are very confident that multiple topics are extensively discussed in the paragraph.
Please be aware that there is a **Other Topics** category. If the paragraph does not fit into any of the predefined topics before **Other Topics** category, put it as **Other Topics**. 
Please provide your reasoning for your classification first, and then provide the topic label and a confidence score from 0-100.

Please respond in clean json format as follow:
```json
{{"reasoning": "<reasoning process>", 
"topic_labels": [{{"topic_label":"<identified topic label>","confidence_score":<confidence score>}},...]}}
```
Response:

"""


##### Run some examples to make sure the prompt works as expected 

In [23]:
# Modify OpenAI's API key and API base to use vLLM's API server.
client = openai.OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key=os.getenv("OPENAI_API_KEY"),
    #base_url=openai_api_base,
)

def get_categories(paragraph):
    response = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0,
    # This is to enable JSON mode, making sure responses are valid json objects
    response_format={ 
        "type": "json_object"
    },
    messages=[
        {
            "role": "system",
            "content": categorize_system_prompt
        },
        {
            "role": "user",
            "content": paragraph
        }
    ],
    )

    return response.choices[0].message.content

In [25]:
# Testing on a few examples
for _, row in df[:2].iterrows():
    description = row['paragraph']
    ground_truth = row['ground_truth_label']
    result = get_categories(description)
    print(f"True Topic: {ground_truth}\nRESULT: {result}\nParagraph: {description}")
    print("\n\n----------------------------\n\n")

True Topic: Fiscal Stance
RESULT: {"reasoning": "The paragraph discusses the need for additional fiscal support in response to the economic impacts of the COVID-19 pandemic. It emphasizes the allocation of budget resources for social assistance, upskilling programs, and support for SMEs, while also addressing the importance of managing public debt and ensuring fiscal sustainability. The focus on fiscal policies, budget allocations, and the implications of government debt aligns closely with the definition of the Fiscal Stance topic, which encompasses the analysis of government fiscal policies and their impact on economic sustainability. Therefore, this paragraph is best classified under the Fiscal Stance topic.", 
"topic_labels": [{"topic_label":"Fiscal Stance","confidence_score":95}]}
Paragraph: Additional, targeted fiscal support is warranted in 2022-2023. The 2022 Budget allocated RM23 billion (1.4 percent of GDP) out of the COVID Fund for COVID-related spending, with an increasing 

#### Create Batch File

The batch file, in the jsonl format, should contain one line (json object) per request. Each request is defined as such:
```json
{
    "custom_id": <REQUEST_ID>,
    "method": "POST",
    "url": "/v1/chat/completions",
    "body": {
        "model": <MODEL>,
        "messages": <MESSAGES>,
        // other parameters
    }
}
```


- convert everything to a list of dicts 

In [27]:
tasks = []
for index, row in df.iterrows():
    description = row['paragraph']
    task = {
        "custom_id": f"task-{index}",
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            # This is what you would have in your Chat Completions API call
            "model": "gpt-4o-mini",
            "temperature": 0,
            "response_format": { 
                "type": "json_object"
            },
            "messages": [
                {
                    "role": "system",
                    "content": categorize_system_prompt
                },
                {
                    "role": "user",
                    "content": description
                }
            ],
        }
    }
    tasks.append(task)

- write to an jsonl file

In [28]:
with open(out_bathtask_path, 'w') as file:
    for obj in tasks:
        file.write(json.dumps(obj) + '\n')

- upload file to openai 

In [29]:
batch_file = client.files.create(
  file=open(out_bathtask_path, "rb"),
  purpose="batch"
)
print(batch_file)

FileObject(id='file-Kz3x6yrx7PC644HH1Ea71x5A', bytes=1490479, created_at=1725040384, filename='sample_batch.jsonl', object='file', purpose='batch', status='processed', status_details=None)


#### Create the batch job

In [30]:
batch_job = client.batches.create(
  input_file_id=batch_file.id,
  endpoint="/v1/chat/completions",
  completion_window="24h", #For now, the completion window can only be set to 24h
  metadata={
      "description": "Test Topic categorization Job"
    }
)

In [34]:
print(batch_job)

Batch(id='batch_hIErVCEAm0msEi4qt8kHgaDE', completion_window='24h', created_at=1725040503, endpoint='/v1/chat/completions', input_file_id='file-Kz3x6yrx7PC644HH1Ea71x5A', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1725126903, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'Test Topic categorization Job'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))


### Checking the Status of a Batch
- example https://platform.openai.com/docs/guides/batch/4-checking-the-status-of-a-batch

In [45]:
import time
batch_client = OpenAI()

In [46]:
batch_id = "batch_hIErVCEAm0msEi4qt8kHgaDE"

for i in range(10):
    status = batch_client.batches.retrieve(batch_id)
    print(f'\rProcessing Status: {status.status}; Progress: {status.request_counts}', end='', flush=True)
    if status.status in ['completed','expired','cancelled','failed']:
        break
    time.sleep(10)  # Simulate work being done

Processing Status: completed; Progress: BatchRequestCounts(completed=100, failed=0, total=100)

In [49]:
out_file_id = status.output_file_id
print(out_file_id)

file-5T8WnQMBYRTe1ZCBo6IRMJa6


#### Retrieving Results

In [55]:
## need output file id from batch status
file_response = client.files.content(out_file_id).content
with open(batchtask_result_path, 'wb') as file:
    file.write(file_response)

In [56]:
# Loading data from saved file
results = []
with open(batchtask_result_path, 'r') as file:
    for line in file:
        # Parsing the JSON string into a dict and appending to the list of results
        json_object = json.loads(line.strip())
        results.append(json_object)

In [62]:
# Reading only the first results
for res in results[:5]:
    task_id = res['custom_id']
    # Getting index from task id
    index = task_id.split('-')[-1]
    result = res['response']['body']['choices'][0]['message']['content']
    print(f"RESULT: {result}")
    print("\n----------------------------\n")

RESULT: {"reasoning": "The paragraph discusses the need for additional fiscal support in response to the economic impacts of the COVID-19 pandemic. It emphasizes the allocation of budget resources for social assistance, upskilling programs, and support for SMEs, while also addressing the importance of managing public debt and fiscal sustainability. The focus on fiscal policies, budget allocations, and the implications of government debt clearly aligns with the definition of the Fiscal Stance topic, which encompasses the analysis of government fiscal policies and their impact on economic sustainability. Therefore, this paragraph is best classified under the Fiscal Stance topic.", 
"topic_labels": [{"topic_label":"Fiscal Stance","confidence_score":95}]}

----------------------------

RESULT: {"reasoning": "The paragraph discusses various aspects of fiscal policy, including the need for additional fiscal support, a medium-term expenditure-based consolidation plan, and measures to address 