In [None]:
import voyageai
import pandas as pd
import numpy as np
import os
from openai import OpenAI
import math
from sentence_transformers import util
from dotenv import load_dotenv
load_dotenv()

OPENAI_api_key = os.getenv("OPENAI_API_KEY")    
client_open_ai = OpenAI(api_key=OPENAI_api_key)

DEEPSEEK_api_key = os.getenv("DEEPSEEK_API_KEY")        # DeepSeek API key here
client_deepseek = OpenAI(api_key=DEEPSEEK_api_key, base_url="https://api.deepseek.com")

# Part A:
##### Working with word vectors.

In [17]:
def get_embedding(text, client_open_ai=client_open_ai, model="text-embedding-3-small"):
    try:
        response = client_open_ai.embeddings.create(
            input=text,
            model=model
        )
        return response.data[0].embedding
    except Exception as e:
        # Print the exception if needed for debugging; otherwise, you can remove this line
        print(f"An error occurred: {e}")
        return np.nan
    

def subtract_lists(list1, list2):
    """
    Subtracts elements of list2 from list1 item-wise.
    
    Args:
        list1 (list of numbers): The first list.
        list2 (list of numbers): The second list (to subtract from the first).
    
    Returns:
        list: A list of item-wise differences.
    """
    return [a - b for a, b in zip(list1, list2)]


In [18]:
king= get_embedding('King')
male = get_embedding("Male")

queen= get_embedding('Queen')
female= get_embedding("Female")


print(util.cos_sim(subtract_lists(king, male), subtract_lists(queen, female))[0][0].item())

0.7163187265396118


In [20]:
father= get_embedding('father')
mother= get_embedding('mother')


print(util.cos_sim(subtract_lists(father, male), subtract_lists(mother, female))[0][0].item())

0.7201025485992432


In [21]:
steel= get_embedding('steel')
sunflower= get_embedding('sunflower')


print(util.cos_sim(steel, sunflower)[0][0].item())

0.18572013080120087


# Part B

##### Working with business news:

In [35]:
# Read the CSV file
business_news_df = pd.read_excel('business_news/ft_unhedged.csv')  

business_news_df

Unnamed: 0,id,news
0,1,The US and UK struck the first Trump-era trade...
1,2,China’s services PMI fell to a 7-month low ye...
2,3,"April’s ISM services survey, out yesterday, ca..."
3,4,Warren Buffett announced over the weekend that...
4,5,"Yesterday, on the first of May, Donald Trump s..."
5,6,"Somewhere behind the Trump tariff story, the A..."
6,7,Donald Trump went to Michigan today to unveil ...
7,8,Chinese officials stated yesterday that they c...
8,10,While the financial world has been laser-focus...


In [36]:
# Apply the embedding function to the 'news' column
business_news_df["embedding"] = business_news_df["news"].apply(get_embedding)

# Drop rows with failed embeddings (NaN)
df = business_news_df.dropna(subset=["embedding"])

# Create a DataFrame from the embeddings
embedding_df = pd.DataFrame(business_news_df["embedding"].to_list())

# Combine the ID column with the embeddings
final_df = pd.concat([business_news_df["id"].reset_index(drop=True), embedding_df], axis=1)

# Optional: Save to CSV
# final_df.to_csv("embeddings.csv", index=False)


In [37]:
final_df

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,1,-0.05746,0.025041,0.003901,0.009033,-0.010354,0.025041,0.039394,0.049653,0.029611,...,0.020292,0.026088,-0.020316,0.001333,0.01734,0.024517,-0.017983,0.004267,-0.012973,0.005061
1,2,0.002099,0.055218,0.047037,-0.004365,0.040249,0.029806,-0.002834,0.043448,0.014914,...,-0.022583,0.004346,-0.004275,-0.011542,0.00923,-0.013141,-0.016274,0.005001,0.014022,0.003342
2,3,0.006,0.041663,0.028199,-0.031177,0.034257,0.021251,-0.056501,0.024242,0.039092,...,-0.03927,0.029421,-0.024356,-0.012369,-0.0134,0.031432,-0.009818,-0.029879,0.000628,-0.013985
3,4,0.059756,-0.011805,0.031902,0.045707,-0.029634,-0.025049,-0.014683,0.072439,0.023232,...,0.014012,0.018659,0.028219,0.022329,0.014171,0.011317,-0.019195,0.001198,0.006518,-0.004674
4,5,-0.018496,0.039727,0.006947,0.023001,-0.033208,-0.011177,0.019449,-0.026591,0.000969,...,0.032573,0.02421,0.021524,-0.000543,0.009773,-0.010548,0.002866,-0.012422,-0.023954,0.002739
5,6,0.016694,0.021272,0.037671,0.01917,0.042972,-0.017845,0.007537,0.082357,0.01069,...,0.003303,0.0566,-0.019813,0.036627,0.010047,0.021366,-0.004679,-0.035717,0.012611,0.025944
6,7,-0.003829,0.004239,0.008997,0.040341,0.008402,0.008778,0.010029,0.040466,0.030056,...,-0.001496,0.017142,0.013676,0.006275,0.002989,0.014477,0.019057,0.004035,-0.00717,0.016604
7,8,0.001048,-0.021556,0.028534,0.026771,0.028509,0.04475,0.015384,0.032656,0.001968,...,-0.004008,-0.018228,0.032433,-0.015521,0.022164,0.014851,-0.023617,0.020066,0.007667,0.022561
8,10,-0.029115,0.006219,0.032568,0.058911,-0.003581,0.029272,0.042273,0.044288,0.019842,...,-0.000844,-0.013982,0.003308,0.022235,0.007691,0.012968,-0.00249,6.7e-05,0.019384,0.005889


# Part C

##### Analyzing economic fundamentals using structured prompts

In [None]:
def analyze_news_effect(currency, headline_data, client=client_open_ai, gpt_model="gpt-4o"):
    """
    Analyzes the effect of a news headline on a specified currency and returns a brief analysis and direction.

    Parameters:
        client: The API client used to access the model.
        currency (str): The currency to analyze (e.g., "AUD").
        headline_data (list): A list containing [news headline, actual, forecast, previous].
        gpt_model (str): The model to use for generating responses.

    Returns:
        list: [analysis, direction] as per model's response, or [NaN, NaN] if an error occurs.
    """
    
    try:
        # Unpack the headline data
        headline, actual, forecast, previous = headline_data
        
        # Check if forecast is NaN and adjust content accordingly
        if pd.isna(forecast):
            content = f"(News headline: {headline}, Actual: {actual}, Previous: {previous})"
        else:
            content = f"(News headline: {headline}, Actual: {actual}, Forecast: {forecast}, Previous: {previous})"
        
        # Format the input for the model
        completion = client.chat.completions.create(
            model=gpt_model,
            messages=[
                {
                    "role": "system",
                    "content": f"Forget all previous instructions. You are a financial analyst. You will be given a news headline. Please analyze the news's effect on {currency} and provide a brief response. Then indicate whether this headline implies {currency} will STRENGTHEN, WEAKEN, or have an INSIGNIFICANT OR UNCERTAIN effect. Generate the output in this format: {{(ANALYSIS: short analysis discussing the channel), (DIRECTION: one of STRENGTHEN, WEAKEN, INSIGNIFICANT OR UNCERTAIN)}}"
                },
                {
                    "role": "user",
                    "content": content
                }
            ]
        )
        
        # Check if the completion response is valid
        response_text = completion.choices[0].message.content if completion and completion.choices else ""
        
        # Locate the starting and ending points for analysis
        analysis_start = response_text.find("(ANALYSIS: ") + len("(ANALYSIS: ")
        direction_marker = "(DIRECTION: "
        analysis_end = response_text.find(direction_marker, analysis_start)
        
        # If parsing is unsuccessful, raise an error
        if analysis_start == -1 or analysis_end == -1:
            raise ValueError("Parsing error: Unable to locate analysis or direction markers.")
        
        # Extract analysis
        analysis = response_text[analysis_start:analysis_end].strip()
        
        # Locate and extract the direction
        direction_start = analysis_end + len(direction_marker)
        direction_end = response_text.find(")", direction_start)
        
        # If direction parsing is unsuccessful, raise an error
        if direction_start == -1 or direction_end == -1:
            raise ValueError("Parsing error: Unable to locate end of direction marker.")
        
        direction = response_text[direction_start:direction_end].strip()
        
        # Return parsed analysis and direction
        return [analysis, direction]
    
    except Exception as e:
        print(e)
        # Return a list with NaNs if an error occurs
        return [math.nan, math.nan]
    
def read_economic_calendar(currency_name):
    headline_data = pd.read_csv(f'economic_calendar/csv_clean/{currency_name}_calendar.csv')
    headline_data.set_index('identifier',inplace=True)
    headline_data.drop(columns=['Unnamed: 0'],inplace=True)
    return headline_data

In [44]:
currency_name = 'CHF'
headline_data = read_economic_calendar(currency_name)

In [45]:
headline_data.head(10)

Unnamed: 0_level_0,Time,Currency,Headline,Actual,Forecast,Previous,DayOfWeek,YYYYMMDD
identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CHF8562,8:30:00,CHF,procure.ch PMI (Dec),43,43.0,42.1,Wednesday,20240103
CHF8564,20:30:00,CHF,CFTC CHF speculative net positions,-5.2K,,-3.4K,Friday,20240105
CHF8566,7:30:00,CHF,Retail Sales (YoY) (Nov),0.007,0.0,-0.003,Monday,20240108
CHF8567,7:30:00,CHF,CPI (YoY) (Dec),0.017,0.015,0.014,Monday,20240108
CHF8568,7:30:00,CHF,CPI (MoM) (Dec),0,-0.002,-0.002,Monday,20240108
CHF8570,6:45:00,CHF,Unemployment Rate n.s.a. (Dec),0.023,,0.021,Tuesday,20240109
CHF8571,6:45:00,CHF,Unemployment Rate s.a. (Dec),0.022,0.022,0.021,Tuesday,20240109
CHF8572,8:00:00,CHF,Foreign Reserves (USD) (Dec),653.7B,,642.4B,Tuesday,20240109
CHF8574,20:30:00,CHF,CFTC CHF speculative net positions,-4.4K,,-5.2K,Friday,20240112
CHF8585,7:30:00,CHF,PPI (MoM) (Dec),-0.006,-0.006,-0.009,Friday,20240119


In [40]:
analyzed_headline_df = pd.DataFrame(columns=[['identifier', 'Analysis', 'Direction']])
for item in headline_data.index[:10]:
    headline_item_li = headline_data[headline_data.index == item][['Headline','Actual','Forecast','Previous']].values.tolist()[0]
    #api_output = ap.analyze_news_effect(currency_name, headline_item_li, client=client_deepseek, gpt_model="deepseek-chat")
    #api_output = analyze_news_effect(currency_name, headline_item_li, client=client_deepseek, gpt_model="deepseek-reasoner")
    api_output = analyze_news_effect(currency_name, headline_item_li, client=client_open_ai, gpt_model="gpt-4o")
    new_row = [item, api_output[0], api_output[1]]
    analyzed_headline_df.loc[len(analyzed_headline_df)] = new_row

In [41]:
analyzed_headline_df

Unnamed: 0,identifier,Analysis,Direction
0,AUD16797,The Judo Bank Australia Manufacturing PMI came...,WEAKEN
1,AUD16799,The improvement in the year-over-year commodit...,STRENGTHEN
2,AUD16802,The Australian Services PMI data came out weak...,WEAKEN
3,AUD16804,The decline in AUD speculative net positions f...,STRENGTHEN
4,AUD16806,The building approvals year-over-year for Nove...,STRENGTHEN
5,AUD16807,The actual Building Approvals data for Novembe...,STRENGTHEN
6,AUD16808,The decline in private house approvals from 0....,WEAKEN
7,AUD16809,The retail sales data for November shows a bet...,STRENGTHEN
8,AUD16811,The Consumer Price Index (CPI) is below the fo...,WEAKEN
9,AUD16813,The news headline indicates that exports incre...,STRENGTHEN
