# End of week 1 exercise

To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  
and responds with an explanation. This is a tool that you will be able to use yourself during the course!

In [None]:
# imports

import re, requests, ollama
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [None]:
# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'

In [None]:
# set up environment

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

openai = OpenAI()


In [None]:
# here is the question; type over this to ask something new

# question = """
# Please explain what this code does and why:
# yield from {book.get("author") for book in books if book.get("author")}
# """

# question = """
# Please explain what this code does and why:
# yield from {book.get("author") for book in books if book.get("author")}
# Popular dev site https://projecteuler.net/
# """

question = """
How good at Software Development is Elijah Rwothoromo? \
He has a Wordpress site https://rwothoromo.wordpress.com/. \
He also has a LinkedIn profile https://www.linkedin.com/in/rwothoromoelaijah/. \
What can we learn from him?
"""


In [None]:
# Process URLs in the question to improve the prompt

# Extract all URLs from the question string using regular expressions
urls = re.findall(r'https?://[^\s)]+', question)
# print(urls)

if len(urls) > 0:
    
    # Fetch the content for each URL using the Website class
    scraped_content = []
    for url in urls:
        print(f"Scraping: {url}")
        try:
            site = Website(url)
            content = f"Content from {url}:\n---\n{site.text}\n---\n" # delimiter ---
            scraped_content.append(content)
        except Exception as e:
            print(f"Could not scrape {url}: {e}")
            scraped_content.append(f"Could not retrieve content from {url}.\n")
    
    # Combine all the scraped text into one string
    all_scraped_text = "\n".join(scraped_content)
    
    # Update the question with the scraped content
    updated_question = f"""
    Based on the following information, please answer the user's original question.
    
    --- TEXT FROM WEBSITES ---
    {all_scraped_text}
    --- END TEXT FROM WEBSITES ---
    
    --- ORIGINAL QUESTION ---
    {question}
    """
else:
    updated_question = question

# print(updated_question)

# system prompt to be more accurate for AI to just analyze the provided text.
system_prompt = "You are an expert assistant. \
Analyze the user's question and the provided text from relevant websites to synthesize a comprehensive answer in markdown format.\
Provide a short summary, ignoring text that might be navigation-related."

# Create the messages list with the newly updated prompt
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": updated_question},
]


In [None]:
# Get gpt-4o-mini to answer, with streaming

def get_gpt_response(question):
    stream = openai.chat.completions.create(
        model=MODEL_GPT,
        messages=messages,
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

get_gpt_response(question)

In [None]:
# Get Llama 3.2 to answer

def get_llama_response(question):
    response = ollama.chat(
        model=MODEL_LLAMA,
        messages=messages,
        stream=False # just get the results, don't stream them
    )
    return response['message']['content']

display(Markdown(get_llama_response(question)))