In [6]:
# imports

import os
import requests
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic
import gradio as gr # oh yeah!

## Setup

In [14]:
OPENAI_MODEL = "openai/gpt-4o-mini"
DEEPSEEK_MODEL = "deepseek/deepseek-chat-v3-0324"
GEMINI_MODEL = "gemini-2.0-flash"
OPENROUTER_URL = "https://openrouter.ai/api/v1"

In [10]:
load_dotenv(override=True)

openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')


if openrouter_api_key:
    print(f"Openrouter API Key exists and begins {openrouter_api_key[:8]}")
else:
    print("Openrouter API Key not set")
if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:8]}")
else:
    print("Google API Key not set")

Openrouter API Key exists and begins sk-or-v1
Google API Key exists and begins AIzaSyDO


In [49]:
gemini = google.generativeai.configure(api_key=google_api_key)

client = OpenAI(
    api_key=openrouter_api_key, 
    base_url=OPENROUTER_URL
)

In [18]:
system_prompt = "You are a helpful assistant"

In [22]:
def message_gpt(usr_prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": usr_prompt}
      ]
    completion = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=messages,
    )
    return completion.choices[0].message.content

In [23]:
message_gpt("What is today's date?")

"Today's date is October 27, 2023."

## User Interface

In [33]:
def shout(text):
    print("Shout has been called wiht the input {text}".format(text=text))
    return text.upper()

In [34]:
shout("Hello")

Shout has been called wiht the input Hello


'HELLO'

In [None]:
view = gr.Interface(fn=shout, inputs="textbox", outputs="textbox")
view.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Shout has been called wiht the input Denn ich lade meinen Ballerman und baller dann auf alle man mit der Gun im arm denn er ist back es ist Kollegah der Boss
Created dataset file at: .gradio\flagged\dataset1.csv


In [None]:
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never").launch()
# gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never").launch(share=True)

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




Shout has been called wiht the input Denn
Shout has been called wiht the input Denn


In [38]:
force_dark_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never", js=force_dark_mode).launch()

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.




In [39]:
view = gr.Interface(
    fn=message_gpt,
    inputs=[gr.Textbox(label="Your message:", lines=6)],
    outputs=[gr.Textbox(label="Response:", lines=8)],
    flagging_mode="never"
)
view.launch()

* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




## User Interface with stream

In [73]:
system_prompt = "You are a helpful assistant that responds in markdown"

In [43]:
def stream_gpt(prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
      ]
    
    stream = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [None]:
gemini = google.generativeai.GenerativeModel(
    model_name='gemini-2.0-flash',
    system_instruction=system_prompt,
    
)

In [66]:
import time

def stream_fake_gemini(prompt):
    response = gemini.generate_content(prompt)
    content = response.text

    # Fake-Streaming: Gib Wort für Wort zurück
    words = content.split()
    result = ""
    for word in words:
        result += word + " "
        yield result
        time.sleep(0.05)  # künstliche Verzögerung

In [71]:
def stream_model(prompt, model):
    if model=="GPT":
        result = stream_gpt(prompt)
    elif model=="Gemini":
        result = stream_fake_gemini(prompt)
    else:
        raise ValueError("Unknown model")
    yield from result

In [74]:
view = gr.Interface(
    fn=stream_model,
    inputs=[gr.Textbox(label="Your message:"), gr.Dropdown(["GPT", "Gemini"], label="Select model", value="GPT")],
    outputs=[gr.Markdown(label="Response:")],
    flagging_mode="never",
    js=force_dark_mode
)
view.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




## Company brochure with Olama and GPT mini

In [144]:
import requests
import json
from bs4 import BeautifulSoup

In [77]:
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

In [None]:
system_prompt = "You are an assistant that analyzes the contents of a company website landing page \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown."

In [137]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

link_system_prompt += "You should respond in JSON as in this example:"

link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [88]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [98]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.\n"
    
    user_prompt += "Links (some might be relative links):\n"
    
    user_prompt += "\n".join(website.links)
    
    return user_prompt

In [99]:
def get_links_ollama(url):
    website = Website(url)
    
    payload = {
        "model": MODEL, 
        "messages": [
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)},
        ],
        "format": "json",
        "stream": False
    }

    response = requests.post(OLLAMA_API, json=payload)
    response.raise_for_status()
    result = response.json()["message"]["content"]
    return json.loads(result)

In [100]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links_ollama(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link.get('type', 'unknown type')}\n"
    if "url" in link:
        result += Website(link["url"]).get_contents()
    else:
        result += "[Keine URL vorhanden]\n"
    return result

In [101]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:10_000] # Truncate if more than 5,000 characters
    return user_prompt

In [142]:
def stream_brochure(company_name, url):
    payload = {
        "model": MODEL, 
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)},
        ],
        "stream": True
    }
    
    response = requests.post(OLLAMA_API, json=payload, stream=True)
    response.raise_for_status()

    result = ""

    for line in response.iter_lines():
        if not line:
            continue
        try:
            data = json.loads(line.decode("utf-8"))
            result += data.get("message", {}).get("content", "")
            yield result
        except json.JSONDecodeError:
            continue

In [130]:
openai_via_openrouter = OpenAI(
    api_key=openrouter_api_key, 
    base_url=OPENROUTER_URL
)

def stream_gpt(company_name, url):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
      ]
    stream = openai_via_openrouter.chat.completions.create(
        model=OPENAI_MODEL,
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [None]:
def stream_brochure_model(company_name, url, model):
    if model=="Ollama":
        result = stream_brochure(company_name, url)
    elif model=="GPT":
        result = stream_gpt(company_name, url)
    else:
        raise ValueError("Unknown model")
    yield from result

In [143]:
view = gr.Interface(
    fn=stream_brochure_model,
    inputs=[
        gr.Textbox(label="Company name:"),
        gr.Textbox(label="Landing page URL including http:// or https://"),
        gr.Dropdown(["Ollama","GPT"], label = "Selecht your preferred model")],
    outputs=[gr.Markdown(label="Brochure:")],
    flagging_mode="never",
    js=force_dark_mode
)
view.launch()

* Running on local URL:  http://127.0.0.1:7885
* To create a public link, set `share=True` in `launch()`.




Found links: {'links': [{'type': 'About page', 'url': 'https://www.chess.com/about'}, {'type': 'Company page', 'url': 'https://www.chess.com'}, {'type': 'Students page', 'url': 'https://www.chess.com/students'}, {'type': 'Jobs/Jobs page', 'url': 'https://www.chess.com/jobs'}, {'type': 'Club/Developer Community page', 'url': 'https://www.chess.com/club/chess-com-developer-community'}, {'type': 'Fair Play page', 'url': 'https://www.chess.com/fair-play'}]}
