In [1]:
import os
os.chdir("../../")
os.getcwd()

'/llm_research'

In [2]:
from langchain.prompts import PromptTemplate


def compile_prompt():
    system_template = """\
You are an experienced economist working on constructing {country}'s Economic Policy Uncertainty Index (EPU index). Your goal is to classify whether a news article introduces the "policy-related economic uncertainty" for {country}.

The label for the news article that surely introduce policy-related economic uncertainty is 1, while the one that 'does not' introduces it is 0. Be careful with the label definition and make the classification based on this definition.

Please follow the below steps strictly.

Step 1:
What country is this news article mainly realted to? If it is 'not' mainly related to {country}, simply classify it with label 0, and there is no need to consider either Step 2 nor Step 3. The relevance is defined, for example, by examining whether the people or companies mentioned in the news are correlated with {country} or if the events in the news actually happen within {country}.

Step 2:
In this step, the news should be related to {country}, and further check whether the news article is related to the {country}'s economic uncertainty, considering future economic conditions, trends, or outcomes. If the news article is 'not' related to the {country}'s economic uncertainty, then it should also be classified as 0.

Step 3:
In this step, the news should be related to the {country}'s economic uncertainty, and further check whether the economic uncertainty is policy-related. For this type of news article that specifically introduces {country}'s policy-related economic uncertainty, it should be classified as 1. One possible example is the news introduces uncertainty as a consequence of changes or ambiguity in government policies, regulations, or fiscal measures.

Notice: After making the classification, please also provide a thorough explanation.\
"""

    system_prompt_template = PromptTemplate.from_template(system_template)
    system_prompt_template.save('examples/news_articles_classification/prompt/system_message.json')

    human_template = """\
News article:
{content}

Question: 
Think step by step and determine which label should the above news article be classified as? Recall that 1 for introducing policy-related economic uncertainty and 0 for the opposite case.

Output Instructions:
1. Your answer should be a json string and be able to parsed into a dictionary with the specified keys. Follow the schema below carefully.
{output_instructions}

2. In addition to correctly classifying the label, provide the reasons based on the previously mentioned steps. Specifically, for label 1 news, indicate at which steps it fails.

3. Besides, don't forget to escape a single quote in the reason section.

{instructions}
"""
    human_prompt_template = PromptTemplate.from_template(human_template)
    human_prompt_template.save('examples/news_articles_classification/prompt/human_message.json')

In [3]:
compile_prompt()

In [4]:
from pydantic import BaseModel, Field


class LLMResponse(BaseModel):
    pred: int = Field(
        description=" ".join((
            "If the news article 'does' introduce policy-related economic uncertainty" 
            "than this field should be 1. Otherwise, it's 0.",
        ))
    )
    reason: str = Field(
        description=" ".join((
            "Reason for why this news article introduce (label 1) or not introduce (label 0) policy-related economic uncertainty.",
            "Your reason should be consistent with your prediction field."
            "Use no more thant 100 words.",
        ))
    )

In [5]:
import orjson
from pathlib import Path

In [6]:
training_samples = [
    orjson.loads(instance)
    for instance in Path("examples/news_articles_classification/training_samples_200.jsonl").read_text().split("\n")
]

In [7]:
training_samples[0]

{'unique_id_current': 'SF_2_2004_12',
 'EU': 0,
 'EPU': 0,
 'category_codes': '',
 'mention_foreign': 0,
 'mention_foreign_list': '',
 'mainly_foreign': 0,
 'who': 0,
 'actions': 0,
 'effects': 0,
 'title': "Schott gets a little practice for A's stadium hunt",
 'content': 'A\'s owner Steve Schott is building a ballpark in the South Bay, in the heart of the Giants\' fan base, with no concern about the Giants\' territorial rights to the area.\n\nHe\'s even funding it himself, and it\'s expected to open next year.\n\nWhat a windfall for the local nine.\n\nNot so fast, A\'s fans. Its capacity is 1,500, and its location is the campus of Santa Clara University, Schott\'s alma mater. It\'s not exactly the revenue-generating jewel that would save the A\'s from an eternity of living on the cheap.\n\nSchott\'s $4 million donation was a generous gift to the school -- its baseball team no longer has to share a field with soccer players -- but the A\'s still don\'t have a ballpark plan of their own

In [8]:
import importlib
import llm_research

In [9]:
prompt = llm_research.Prompt(
    LLMResponse,
    'examples/news_articles_classification/prompt/system_message.json',
    'examples/news_articles_classification/prompt/human_message.json',
    country = "US"
)

In [10]:
model = llm_research.OllamaLLM(
    model = 'phi4:14b-q4_K_M',
    num_ctx = 16384,
    num_predict = -1, 
    seed = 1126,
)

In [11]:
model.init_request(
    "news_examples_200",
    "phi4_zero_shot"
)

In [12]:
model.request_batch(
    prompt,
    "examples/news_articles_classification/training_samples_200.jsonl",
    "examples/news_articles_classification/fewshot_examples.jsonl",
    sleep=0
)

  memory = ConversationBufferMemory(return_messages=True)
100%|██████████| 200/200 [29:11<00:00,  8.76s/it]


In [55]:
model.end_request()