In [2]:
import os
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display

In [4]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [6]:
# Let's try one out. Change the website and add print statements to follow along.

ed = Website("https://edwarddonner.com")
print(ed.title)
print(ed.text)

Home - Edward Donner
Home
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
our matching model, and our award-winning platform has happy customers and tons of press coverage.
Connect
with me for

In [8]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "당신은 웹사이트의 내용을 분석하고 \
짧은 요약을 제공하는 조수이며, 탐색과 관련된 텍스트는 무시합니다. \
마크다운으로 응답합니다."

In [10]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"당신은 제목의 웹사이트를 보고 있습니다 {website.title}"
    user_prompt += "\n이 웹사이트의 내용은 다음과 같습니다. \
    이 웹사이트의 간략한 요약을 마크다운으로 제공하세요. \
    뉴스나 공지 사항이 포함되어 있는 경우, 이것도 요약하세요.\n\n"
    user_prompt += website.text
    return user_prompt

In [12]:
print(user_prompt_for(ed))

당신은 제목의 웹사이트를 보고 있습니다 Home - Edward Donner
이 웹사이트의 내용은 다음과 같습니다.     이 웹사이트의 간략한 요약을 마크다운으로 제공하세요.     뉴스나 공지 사항이 포함되어 있는 경우, 이것도 요약하세요.

Home
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
o

In [16]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [18]:
# Try this out, and then try for a few more websites

messages_for(ed)

[{'role': 'system',
  'content': '당신은 웹사이트의 내용을 분석하고 짧은 요약을 제공하는 조수이며, 탐색과 관련된 텍스트는 무시합니다. 마크다운으로 응답합니다.'},
 {'role': 'user',
  'content': '당신은 제목의 웹사이트를 보고 있습니다 Home - Edward Donner\n이 웹사이트의 내용은 다음과 같습니다.     이 웹사이트의 간략한 요약을 마크다운으로 제공하세요.     뉴스나 공지 사항이 포함되어 있는 경우, 이것도 요약하세요.\n\nHome\nOutsmart\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\nAbout\nPosts\nWell, hi there.\nI’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (\nvery\namateur) and losing myself in\nHacker News\n, nodding my head sagely to things I only half understand.\nI’m the co-founder and CTO of\nNebula.io\n. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m pre

In [24]:
# And now: call the OpenAI API. You will get very familiar with this!

# def summarize(url):
#     website = Website(url)
#     response = openai.chat.completions.create(
#         model = "gpt-4o-mini",
#         messages = messages_for(website)
#     )
#     return response.choices[0].message.content

In [44]:
# Constants

OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

In [46]:
messages = messages_for(ed)

In [48]:
payload = {
        "model": MODEL,
        "messages": messages,
        "stream": False
    }

In [34]:
# Let's just make sure the model is loaded

!ollama pull llama3.2

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest 
pulling dde5aa3fc5ff... 100% ▕████████████████▏ 2.0 GB                         
pulling 966de95ca8a6... 100% ▕████████████████▏ 1.4 KB                         
pulling fcc5a6bec9da... 100% ▕████████████████▏ 7.7 KB                         
pulling a70ff7e570d9... 100% ▕████████████████▏ 6.0 KB                         
pulling 56bb8bd477a5... 100% ▕████████████████▏   96 B                         
pulling 34bb5ab01051... 100% ▕████████████████▏  561 B                         
verifying sha256 digest 
wri

In [38]:
import ollama

response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
print(response.json()['message']['content'])

# Edward Donner의 웹사이트 요약

## 웹사이트 소개

이 웹사이트는 에드워드 دون너 (Edward Donner)가 운영하는 website입니다. website은 LLMs (Large Language Models)의 개발과应用에關한 정보를 제공합니다.

### 웹사이트 내용

* 에드워드 دون너와 그의 experience
* 네불라.io (Nebula.io)라는 AIstartup과 관련된 information
* LLMs의 application 및 patneted matching model
* AI-related resource 및 article

## 뉴스나 공지 사항

* "December 21, 2024" - SuperDataScientists에 welcoming message
* "Mastering AI and LLM Engineering – Resources" (2024-10-16) - 리소스 공유 notice
* "From Software Engineer to AI Data Scientist – resources" (2024-08-06) - 리소스 공유 notice


In [50]:
import ollama

response = ollama.chat(model=MODEL, messages=messages)
print(response['message']['content'])

### website 요약

* 웹사이트 제목은 "Edward Donner"이며, author의 개인적인 trang입니다.
* author Ed는 LLMs와 관련된 주제에 관심이 있으며 DJing과 electronic music production을 즐기고 있습니다.
* author는 Nebula.io의 CTO이자 AI startup untapt의 이전 CEO인 것으로 나타납니다. 
* website에는 LLM Arena라는 이름의 게임과 AI-related.resource들에 대한 정보가 포함되어 있습니다.
