In [None]:
# all imports

from openai import OpenAI
from IPython.display import Markdown, display, update_display

In [None]:
# initialize the ollama client

OLLAMA_BASE_URL = "http://localhost:11434/v1"
client = OpenAI(base_url=OLLAMA_BASE_URL, api_key="ollama")

In [None]:
# test if the client is responsive

# messages = [{"role": "user", "content": "Hi there!"}]

# model = "llama3.1"

# response = client.chat.completions.create(
#     model=model,
#     messages=messages
# )

# print(response.choices[0].message.content)

In [None]:
# function to stream response from the model, update the display in real-time, and return the final response as a string

def stream_llm_response(model, messages):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        stream=True
    )

    full_response = ""
    display_id = None

    for chunk in response:
        content = chunk.choices[0].delta.content or ""
        full_response += content

        if display_id is None:
            display_id = display(Markdown(full_response), display_id=True).display_id
        else:
            update_display(Markdown(full_response), display_id=display_id)

    return full_response


In [None]:
# test the streaming function with a simple prompt

# messages = [{"role": "user", "content": "Tell a funny joke for a Machine Learning engineer."}]
# model = "llama3.1"
# response = stream_llm_response(model=model, messages=messages)
# print("\n\n--------------------------------------------------------------------------------\n\n")
# print(response)

In [None]:
# define the system prompt for the extraction task

# system_prompt = """
# You are an AI assistant that extracts **accurate, structured insights** from markdown articles.

# ### Task
# From the given markdown article, produce:

# ## Key Takeaways
# - Core conclusions, claims, or findings
# - Important facts, numbers, decisions, or timelines

# ## Summary
# - A concise but complete overview
# - Capture the main topic, purpose, and key points

# ### Rules
# - Preserve meaning, context, and important qualifiers
# - Do not add opinions or external information
# - Do not include meta commentary
# - Use clear markdown headings and bullet points
# - Be concise but information-dense
# """

system_prompt = """
You are an expert at extracting accurate, structured summaries and key takeaways from markdown articles.
You are given a web scraped markdown article. 
Your task is to extract the key takeaways and a concise summary from the article.
The output should be in markdown format.
Reply only with the markdown content, do not include any explanations or commentary.
"""

In [None]:
# define the user prompt for the extraction task (for scraped-content/main_content.md)

with open("scraped-content/main_content.md", "r", encoding="utf-8") as f:
    content = f.read()

user_prompt = f"""
Here is a web scraped markdown article. 
Extract the key takeaways and a concise summary according to the system prompt instructions.

Article:
{content}

Return the output in markdown format. 
Do not include any explanations or commentary, reply only with the markdown content.
Start generating the response immediately.
"""

In [None]:
print(user_prompt)

In [None]:
# call the model to perform the extraction task

messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
]

model = "llama3.1"

response = stream_llm_response(model=model, messages=messages)