# Recommendations

This notebook demonstrates how to generate article recommendations based on semantic similarity using OpenAI embeddings. The approach here ONLY takes into account the content of the articles themselves, not any user behavior or other contextual information.

## Setup

Import required libraries, load environment variables, initialize OpenAI client and define method to create embeddings.

In [None]:
from dotenv import load_dotenv
from openai import OpenAI
from scipy import spatial

load_dotenv()

client = OpenAI()

## Create Articles Dictionary

In [None]:
articles = [
    {
        "headline": "Economic Growth Continues Amid Global Uncertainty",
        "topic": "Business",
        "keywords": ["economy", "business", "finance"],
    },
    {
        "headline": "1.5 Billion Tune-in to the World Cup Final",
        "topic": "Sport",
        "keywords": ["soccer", "world cup", "tv"],
    },
    {
        "headline": "NASA Launches New Mars Rover Mission",
        "topic": "Science",
        "keywords": ["space", "nasa", "mars"],
    },
    {
        "headline": "AI Startups Raise Record Funding in 2025",
        "topic": "Technology",
        "keywords": ["artificial intelligence", "startups", "venture capital"],
    },
    {
        "headline": "Wildfires Spread Across Southern Europe",
        "topic": "Environment",
        "keywords": ["climate", "wildfire", "europe"],
    },
    {
        "headline": "New Study Links Sleep Quality to Heart Health",
        "topic": "Health",
        "keywords": ["sleep", "heart disease", "research"],
    },
    {
        "headline": "Stock Markets Hit All-Time Highs on Earnings Reports",
        "topic": "Business",
        "keywords": ["stocks", "wall street", "earnings"],
    },
    {
        "headline": "Olympic Committee Announces Host City for 2036 Games",
        "topic": "Sport",
        "keywords": ["olympics", "host city", "2036"],
    },
    {
        "headline": "Electric Vehicle Sales Surpass Gas Cars in Norway",
        "topic": "Technology",
        "keywords": ["electric vehicles", "norway", "automotive"],
    },
    {
        "headline": "Global Leaders Meet at UN Climate Summit",
        "topic": "Politics",
        "keywords": ["united nations", "climate change", "diplomacy"],
    },
]

current_article = {
    "headline": "Breakthrough in Quantum Computing Achieved",
    "topic": "Technology",
    "keywords": ["quantum computing", "research", "innovation"],
}

## Create Article Text

In [None]:
def create_article_text(article):
  return f"""
Headline: {article['headline']}
Topic: {article['topic']}
Keywords: {', '.join(article['keywords'])}
"""

## Combine Features

In [None]:
article_texts = [create_article_text(article) for article in articles]
current_article_text = create_article_text(current_article)
print(current_article_text)

## Create Embeddings

In [None]:
def create_embeddings(texts):
  response = client.embeddings.create(
    model="text-embedding-3-small",
    input=texts
  )

  response_dict = response.model_dump()

  return [item['embedding'] for item in response_dict['data']]

current_article_embeddings = create_embeddings(current_article_text)[0]
article_embeddings = create_embeddings(article_texts)

## Finding the most similar article

In [None]:
def find_n_closest(query_vector, embeddings, n=3):
  distances = []

  for index, embedding in enumerate(embeddings):
    dist = spatial.distance.cosine(query_vector, embedding)
    distances.append({"distance": dist, "index": index})

  distances_sorted = sorted(distances, key=lambda x: x["distance"])

  return distances_sorted[0:n]

In [None]:
hits = find_n_closest(current_article_embeddings, article_embeddings)

for hit in hits:
  article = articles[hit["index"]]
  print(f"Recommended article: {article['headline']} (Distance: {hit['distance']})")