In [4]:
# Imports
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [4]:
# Load environment variables
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
    raise Exception('API key is not found!')
elif api_key != api_key.strip():
    raise Exception('API key is found but it has leading or trailing spaces which should be removed')
else:
    print('API key is found!')

API key is found!


In [5]:
# Hello chatgpt! This is expected to fail if the API key is not set correctly.
openai = OpenAI()
model = 'gpt-4o-mini'
messages = [{'role': 'user', 'content': 'Hello llm!'}]
# response = openai.chat.completions.create(model=model, messages=messages)
# print(response.choices[0].message.content)

## Web Scraping for Content

In [5]:
# Web scraping example

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

class Website:
    def __init__(self, url):
        self.url = url
        self.content = None
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else 'No title found'
        self.text = soup.get_text(separator='\n', strip=True)

In [None]:
url = 'https://wikipedia.org'

website = Website(url)
print(f'Title of the webpage: {website.title}')
print(f'Content of the webpage: {website.text[:100]}...')  # Print the first 100 characters of the content.

Title of the webpage: METTLER TOLEDO Balances & Scales for Industry, Lab, Retail - METTLER TOLEDO
Content of the webpage: METTLER TOLEDO Balances & Scales for Industry, Lab, Retail - METTLER TOLEDO
Products & Solutions
The...


## Prompts

### Types of prompt
- System prompt - Type of task and the tone to be used.
- User prompt - Conversation the system should reply for.

In [10]:
system_prompt = 'You are an AI assistant that summarizes webpages. \
Provide a concise summary of the content. \
Ignore any HTML tags and navigation related text and focus on the main text. \
Respond in markdown format.'

def get_user_prompt(website):
    return f'Summarize the content of the webpage {url} with the title "{website.title}". \
The content of the website is as follows:\n{website.text}'

print(f'System prompt: {system_prompt}')
print(f'User prompt: {get_user_prompt(website)}')



System prompt: You are an AI assistant that summarizes webpages. Provide a concise summary of the content. Ignore any HTML tags and navigation related text and focus on the main text. Respond in markdown format.
User prompt: Summarize the content of the webpage https://mt.com with the title "METTLER TOLEDO Balances & Scales for Industry, Lab, Retail - METTLER TOLEDO". The content of the website is as follows:
METTLER TOLEDO Balances & Scales for Industry, Lab, Retail - METTLER TOLEDO
Products & Solutions
There was an error retrieving our menu. Please reload the page.
Industries
There was an error retrieving our menu. Please reload the page.
Services & Support
There was an error retrieving our menu. Please reload the page.
Events & Expertise
There was an error retrieving our menu. Please reload the page.
About Us
There was an error retrieving our menu. Please reload the page.
Contact Us
Welcome, {mt:userTitle/} {mt:lastName/}
Login / Registration
My mt.com
Logout
Select Country
Need ass

## Messages

In [None]:
## Simple message.

messages = [
    {'role': 'system', 'content': 'You are an AI assistant that is very funny but also an expert in mathematics.'},
    {'role': 'user', 'content': 'What is the square root of 16?'}
]

In [11]:
def get_messages(website: Website) -> list:
    """
    Returns a list of messages with system and user prompts.
    """
    return [
        {'role': 'system', 'content': system_prompt},
        {'role': 'user', 'content': get_user_prompt(website)}
    ]

print(get_messages(website))

[{'role': 'system', 'content': 'You are an AI assistant that summarizes webpages. Provide a concise summary of the content. Ignore any HTML tags and navigation related text and focus on the main text. Respond in markdown format.'}, {'role': 'user', 'content': 'Summarize the content of the webpage https://mt.com with the title "METTLER TOLEDO Balances & Scales for Industry, Lab, Retail - METTLER TOLEDO". The content of the website is as follows:\nMETTLER TOLEDO Balances & Scales for Industry, Lab, Retail - METTLER TOLEDO\nProducts & Solutions\nThere was an error retrieving our menu. Please reload the page.\nIndustries\nThere was an error retrieving our menu. Please reload the page.\nServices & Support\nThere was an error retrieving our menu. Please reload the page.\nEvents & Expertise\nThere was an error retrieving our menu. Please reload the page.\nAbout Us\nThere was an error retrieving our menu. Please reload the page.\nContact Us\nWelcome, {mt:userTitle/} {mt:lastName/}\nLogin / Reg

## Summarize Website

In [None]:
def summarize_website_with_openai(website):
    model = 'gpt-4o-mini'
    messages = get_messages(website)
    response = openai.chat.completions.create(model=model, messages=messages)
    summary = Markdown(response.choices[0].message.content)
    return summary

summarize_website_with_openai(website)

In [12]:
import ollama

def summarize_website_with_ollama(website):
    model = 'llama3.2'
    messages = get_messages(website)
    response = ollama.chat(model=model, messages=messages, stream=False)
    summary = Markdown(response['message']['content'])
    return summary

summarize_website_with_ollama(website)

**METTLER TOLEDO Overview**
METTLER TOLEDO is a global company that provides balances and scales for various industries, including laboratory, retail, and industrial applications.

**Products and Solutions**
The company offers a wide range of products, including:

* Laboratory Balances: analytical, precision, and micro balances
* Industrial Scales and Load Cell Systems
* Product Inspection Equipment
* Rainin Liquid Handling Solutions
* Process Analytics

**Services and Support**
METTLER TOLEDO provides various services, such as:

* Repair Services and Spare Parts
* Calibration, Verification, and Certification
* Preventive Maintenance
* Installation, Qualification, Setup, and Configuration
* Service Packages

**Expertise and Training**
The company offers training and expertise in areas such as:

* Laboratory Balances
* Industrial Scales
* Product Inspection Equipment
* Process Analytics
* Automated Lab Reactors and In-Situ Analysis

**About Us and Careers**
METTLER TOLEDO is committed to building a better, safer, and healthier tomorrow. The company has a strong presence globally and offers exciting career opportunities.

**Contact Us**
To get in touch with METTLER TOLEDO's experts, you can start a live chat or schedule an online consultation. You can also find more information on their website and contact them through various channels.