# End of week 1 exercise

To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  
and responds with an explanation. This is a tool that you will be able to use yourself during the course!

In [None]:
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display, Image
from openai import OpenAI
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from urllib.parse import urlparse, urljoin
import time
import random
import concurrent.futures
import re

In [None]:
# constants

MODEL = 'gpt-4o-mini'
openai = OpenAI()
MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'

In [None]:
# set up environment

# A modified class to fetch and parse fully rendered pages
class NewWebsite:
    shared_driver = None  # Class variable to share browser instance

    def __init__(self, url, driver=None):
        self.url = url
        self.driver = driver or NewWebsite._get_shared_driver()
        self.text, self.title, self.links = self._scrape_content()
    
    @classmethod
    def _get_shared_driver(cls):
        if cls.shared_driver is None:
            # Set up headless Chrome options
            options = Options()
            options.add_argument("--headless=new")
            options.add_argument("--disable-gpu")
            options.add_argument("--no-sandbox")
            options.add_argument("--disable-dev-shm-usage")
            options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36")

            service = Service(ChromeDriverManager().install())
            cls.shared_driver = webdriver.Chrome(service=service, options=options)
        return cls.shared_driver

    def _scrape_content(self):
        try:
            self.driver.get(self.url)
            # Mimick human browsing behavior without overloading the server
            WebDriverWait(self.driver, 15).until(EC.presence_of_element_located((By.TAG_NAME, "a")))
            # Allow JS-rendered content to settle
            time.sleep(2)

            # Get the page source after rendering
            soup = BeautifulSoup(self.driver.page_source, "html.parser")
            
            for tag in soup(["script", "style", "img", "input"]):
                tag.decompose()
            
            title = soup.title.string.strip() if soup.title and soup.title.string else "No title found"
            body = soup.body
            text = soup.body.get_text(separator="\n", strip=True) if body else "No content found."

            # Extract and clean links
            links = []
            for link_tag in soup.find_all("a", href=True):
                href = link_tag["href"].strip()
                if href and not href.startswith(("mailto:", "tel:", "javascript:")):
                    full_url = urljoin(self.url, href)
                    links.append(full_url)
                    
            return text, title, links
            
        except Exception as e:
            return "Error loading content", "Error", []

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

    # Close the driver
    @classmethod
    def close_driver(cls):
        if cls.shared_driver:
            cls.shared_driver.quit()
            cls.shared_driver = None

link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

def get_links(url):
    website = NewWebsite(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

def scrape_link(link):
    try:
        page = NewWebsite(link["url"])
        return f"\n\n{link['type']}\n{page.get_contents()}"
    except Exception as e:
        return f"\n\n{link['type']}\nError loading page: {e}"

# Threaded scraper for linked pages
def get_all_details_rendered_concurrently(url):
    result = "Landing page:\n"
    result += NewWebsite(url).get_contents()

    # LLM-filtered link generator
    links = get_links(url)
    print("Found links:", links)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_link = {executor.submit(scrape_link, link): link for link in links["links"]}
        for future in concurrent.futures.as_completed(future_to_link):
            result += future.result()

    # Close shared browser
    NewWebsite.close_driver()
    return result


In [None]:
# here is the question; type over this to ask something new

system_prompt = "You are an LLM Engineer that analyzes the contents of several relevant pages from a company website \
rewrites internal tools and systems and rebuilds them end-to-end, starting from scratch. Starting with the online application at cardiff.co/apply, \
Tell me why you're best suited to be the lead of this project and work with our 12 year resident developer to implement a \
state of the art solution in record time. Include backend architecture, model orchestration, how you handle latency, cost and user experience, \
and details of how you would achieve this goal based on company culture and industries served if you have the information, \
and walk me through the details like you're explaining it to a sharp product owner. Respond in markdown."\


def get_solution_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a solution to rewrite the company's application in markdown.\n"
    #user_prompt += get_all_details(url)
    user_prompt += get_all_details_rendered_concurrently(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

def create_solution(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_solution_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

    return result

#create_solution("Cardiff", "https://cardiff.co")


In [None]:
# Get gpt-4o-mini to answer, with streaming

new_system_prompt = "You are a Senior Engineer that analyzes the planned solution given to you for a company website \
and you rewrite code for rebuilding internal tools and systems end-to-end based on the proposed solutions. \
Start with the online application at cardiff.co/apply, use canvas and write code for the proposed solution \
in the appropriate language that best suits the task for backend architecture, model orchestration, how you handle latency, cost and user experience wherever possible."

output_dir = "cardiff_rebuild_output"
os.makedirs(output_dir, exist_ok=True)

def save_code_blocks(markdown_text, base_filename="cardiff_code"):
    output_dir = "cardiff_rebuild_output"
    os.makedirs(output_dir, exist_ok=True)
    
    code_blocks = re.findall(r"```(.*?)\n(.*?)```", markdown_text, re.DOTALL)
    saved_files = []

    for idx, (language, code) in enumerate(code_blocks, 1):
        ext = language.strip() if language else "txt"
        filename = f"{base_filename}_part{idx}.{ext}"
        filepath = os.path.join(output_dir, filename)
        with open(filepath, "w", encoding="utf-8") as f:
            f.write(code)
        saved_files.append(filepath)

    return saved_files

def develop_from_proposal(proposal_text, company_name):
    # Stream code generation from GPT-4o
    system = "You are a senior software engineer. Use the following proposal to generate production-ready code to \
    implement the backend, frontend, and any orchestration described. Write clean, documented code in markdown format."
    
    stream = openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": proposal_text}
        ],
        stream=True
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        content = chunk.choices[0].delta.content or ""
        response += content
        update_display(Markdown(response), display_id=display_handle.display_id)

    saved_files = save_code_blocks(response)
    
    # Generate a UI design mockup image
    image_prompt = f"A modern, mobile-friendly UI wireframe for a business loan application system for {company_name}. Clean layout, input fields for business name, revenue, loan amount, industry, and contact info. Includes a step-by-step progress bar, submit button, and secure branding."
    
    img_response = openai.images.generate(
        model="dall-e-3",
        prompt=image_prompt,
        n=1,
        size="1024x1024"
    )
    
    image_url = img_response.data[0].url
    img_path = os.path.join(output_dir, f"{company_name.lower()}_ui_mockup.png")
    with open(img_path, 'wb') as handler:
        handler.write(requests.get(image_url).content)

    print("Code files saved to:", saved_files)
    print("UI mockup saved at:", img_path)

    display(Markdown("### Proposed UI Design"))
    display(Image(url=image_url))

proposal = create_solution("Cardiff", "https://cardiff.co")
develop_from_proposal(proposal, "Cardiff")


In [None]:
# Get Llama 3.2 to answer