In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [10]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class scrapeWebsite:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    #get webpage Title and Page content
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [11]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a childrens broucher about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"

link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [12]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a childrens broucher about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [16]:
#Get Links User prompt

def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a childrens brochure about the company, respond with the full https URL in JSON format. \
    Do not include Terms of Service, Contact, Privacy pages or links and email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [20]:
pages = scrapeWebsite("https://www.arla.com/")
pages.links
print(get_links_user_prompt(pages))

Here is the list of links on the website of https://www.arla.com/ - please decide which of these are relevant web links for a childrens brochure about the company, respond with the full https URL in JSON format.     Do not include Terms of Service, Contact, Privacy pages or links and email links.
Links (some might be relative links):
/
/
/all-our-brands/
/sustainability/
/sustainability/the-food/
/sustainability/the-packaging/
/sustainability/the-transport/
/sustainability/the-dairies/
/sustainability/the-farms/
/sustainability/the-cows/
/sustainability/the-land/
/sustainability/arlas-climate-ambition/
/sustainability/lets-talk-about-dairy/
/company/investor/annual-reports/
/company/arla-farmers/farm-ahead/
/company/
/sustainability/
/sustainability/the-farms/arlas-sustainability-incentive-model-qa/
https://ec.europa.eu/clima/policies/international/negotiations/paris_en
https://www.arla.com/company/news-and-press/2022/pressrelease/arla-doubles-co2e-target-for-operations/
/sustainabilit

In [21]:
#Get Links from webpage

def get_links(url):
    website = scrapeWebsite(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [22]:
get_links("https://www.yeovalley.co.uk/")

{'links': [{'type': 'about page',
   'url': 'https://www.yeovalley.co.uk/about-us'},
  {'type': 'careers page', 'url': 'https://careers.yeovalley.co.uk/'},
  {'type': 'come and visit us page',
   'url': 'https://www.yeovalley.co.uk/come-and-visit-us'},
  {'type': 'food range page',
   'url': 'https://www.yeovalley.co.uk/our-food?range=little-yeos'},
  {'type': 'blog about weaning',
   'url': 'https://www.yeovalley.co.uk/blog/preparing-for-weaning-and-beyond'},
  {'type': 'recipes page', 'url': 'https://www.yeovalley.co.uk/recipes'}]}

## Generate Broucher from page text

In [25]:
#gets each associated link, uses the scrape function to open lins and get page content
def get_all_details(url):
    result = "Landing page:\n"
    result += scrapeWebsite(url).get_contents()
    links = get_links(url)
    print(f"Found links: {links}")
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += scrapeWebsite(link["url"]).get_contents()
    return result

In [26]:
get_all_details("https://www.yeovalley.co.uk/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.yeovalley.co.uk/about-us'}, {'type': 'careers page', 'url': 'https://careers.yeovalley.co.uk/'}, {'type': 'visit us page', 'url': 'https://www.yeovalley.co.uk/come-and-visit-us'}, {'type': 'recipes page', 'url': 'https://www.yeovalley.co.uk/recipes'}, {'type': 'blog page', 'url': 'https://www.yeovalley.co.uk/blog'}, {'type': 'our food page', 'url': 'https://www.yeovalley.co.uk/our-food'}]}


'Landing page:\nWebpage Title:\nYeo Valley Organic\nWebpage Contents:\nSkip to main content\nIn the valley\nOur food\nPlay and win\nIn the valley\nOur food\nPlay and win\nBank\nSpend\nWin\nBank\nSpend\nWin\nSign up / in\nWe get nature, you get delicious\nStart collecting\nYeokens Today\nbank\nspend\nwin\nAlmost every Yeo Valley Organic product you buy comes with a special code that you can redeem online for Yeokens. Start collecting today!\nSign Up / In Now\nYeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens\nLatest from Yeo...\nFlippin\' Delicious\nOur free-range organic grass-fed British steak beef burgers speak for themselves - tender, succulent, and bursting with flavour.\nPreparing for weaning and beyond\nPreparing for weaning can feel overwhelming, but watching your babies’ face as they take their first tastes and the joy that comes with this new and exciting experience is a lot of fun.\nDoes Yeo Valley use feed additives?\nYeo Valley 

In [32]:
# Main System prompt to create broucher
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short childrens brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

system_prompt_French = "You are an French language assistant that analyzes the contents of several relevant pages from a company website \
and creates a short childrens brochure about the company for prospective customers, investors and recruits in French. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [35]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short childrens brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:3_000] # Truncate if more than 3,000 characters
    return user_prompt

def get_french_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short childrens brochure of the company in french and in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:3_000] # Truncate if more than 3,000 characters
    return user_prompt

In [29]:
get_brochure_user_prompt("Yeovalley", "https://www.yeovalley.co.uk/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.yeovalley.co.uk/about-us'}, {'type': 'come and visit us page', 'url': 'https://www.yeovalley.co.uk/come-and-visit-us'}, {'type': 'careers page', 'url': 'https://careers.yeovalley.co.uk/'}, {'type': 'recipes page', 'url': 'https://www.yeovalley.co.uk/recipes'}, {'type': 'blog page', 'url': 'https://www.yeovalley.co.uk/blog'}]}


"You are looking at a company called: Yeovalley\nHere are the contents of its landing page and other relevant pages; use this information to build a short childrens brochure of the company in markdown.\nLanding page:\nWebpage Title:\nYeo Valley Organic\nWebpage Contents:\nSkip to main content\nIn the valley\nOur food\nPlay and win\nIn the valley\nOur food\nPlay and win\nBank\nSpend\nWin\nBank\nSpend\nWin\nSign up / in\nWe get nature, you get delicious\nStart collecting\nYeokens Today\nbank\nspend\nwin\nAlmost every Yeo Valley Organic product you buy comes with a special code that you can redeem online for Yeokens. Start collecting today!\nSign Up / In Now\nYeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens Yeokens\nLatest from Yeo...\nFlippin' Delicious\nOur free-range organic grass-fed British steak beef burgers speak for themselves - tender, succulent, and bursting with flavour.\nPreparing for weaning and beyond\nPreparing for weaning can feel ove

In [30]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [31]:
create_brochure("Yeovalley", "https://www.yeovalley.co.uk/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.yeovalley.co.uk/about-us'}, {'type': 'come and visit us page', 'url': 'https://www.yeovalley.co.uk/come-and-visit-us'}, {'type': 'careers page', 'url': 'https://careers.yeovalley.co.uk/'}, {'type': 'blog page', 'url': 'https://www.yeovalley.co.uk/blog'}, {'type': 'recipes page', 'url': 'https://www.yeovalley.co.uk/recipes'}, {'type': 'our food page', 'url': 'https://www.yeovalley.co.uk/our-food'}]}


# Welcome to Yeo Valley Organic!

### Howdy, Little Friends!

At Yeo Valley Organic, we are all about tasty food that's good for you and good for nature too! From delicious yogurts to scrumptious beef burgers, everything we make comes from happy, free-range animals and the best organic ingredients. Let’s hop in and discover more about our awesome company!

---

## 🌼 What We Do!

**Yummy Food!**  
We create lots of tasty products like yogurt, milk, cheese, and even frozen treats! All our goodies are made with organic ingredients, so you can enjoy them with a smile, knowing they are good for the planet!

**Our Special Codes!**  
Did you know that you can collect **Yeokens**? Every time you buy a Yeo Valley product, there's a secret code on it. You can enter these codes online to collect Yeokens and get cool prizes! 

**Fun Games & Prizes!**  
We love to play! Join us for exciting games where you can win fun surprises. It’s easy peasy and so much fun!

---

## 🌳 Company Culture

At Yeo Valley, we believe in working together like a big happy family! Our team is filled with caring and friendly people, like Jade, who just joined our kitchen team! We support each other and love to share knowledge about healthy and delicious food. Plus, we all work hard to take care of our beautiful farms and animals.

---

## 🌈 Our Customers

We have super special customers, just like you! Families who want yummy snacks that are good for the earth choose Yeo Valley. We hope you join our community to taste the adventure!

---

## 🌟 Careers at Yeo Valley

Have you ever thought about working at a fun place like Yeo Valley? We are always looking for bright sparkly stars to join our team! If you love working with food and care about the planet, Yeo Valley could be the perfect place for you when you grow up!

---

### 🎉 Join the Yeo Valley Family!

Sign up today and start collecting your Yeokens! You can play fun games, enjoy tasty organic snacks, and maybe even win prizes! Getting to know nature and delicious food is a fantastic adventure!

Remember, every bite you take is a step towards helping nature thrive! 🌿

---

**Yeo Valley Organic - We get nature, you get delicious!**  🍦🥛🎈

In [40]:
def create_brochure_french(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt_French},
            {"role": "user", "content": get_french_brochure_user_prompt(company_name, url)}
          ],    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [41]:
create_brochure_french("Yeovalley", "https://www.yeovalley.co.uk/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.yeovalley.co.uk/about-us'}, {'type': 'careers page', 'url': 'https://careers.yeovalley.co.uk/'}, {'type': 'come and visit us page', 'url': 'https://www.yeovalley.co.uk/come-and-visit-us'}, {'type': 'blog page', 'url': 'https://www.yeovalley.co.uk/blog'}, {'type': 'recipes page', 'url': 'https://www.yeovalley.co.uk/recipes'}]}


# Brochure pour les Enfants : Yeo Valley Organic

## Bienvenue à Yeo Valley !

### Qu'est-ce que Yeo Valley ?
Yeo Valley est une ferme biologique magnifique située dans une vallée verdoyante. Nous produisons les meilleurs produits laitiers biologiques, comme le yaourt, le lait et même des burgers de boeuf savoureux ! Tout est fait avec amour et respect pour la nature.

![Image de la ferme Yeo Valley](URL-de-l-image)  

### Notre Culture et Équipe
Chez Yeo Valley, nous croyons en une ambiance familiale et chaleureuse. Notre équipe est composée de personnes passionnées par la nourriture et l'environnement. Rencontrez Jade, notre nouvelle cuisinière qui lutte pour créer de délicieux plats à partir de nos ingrédients bio.

### Nos Clients
Nos produits sont appréciés par des milliers de familles qui aiment manger sainement. Que vous soyez un petit Yeos ou un grand Yeos, nous avons des produits pour tout le monde ! Nos yaourts sont faits avec des fruits délicieux et du lait provenant de vaches heureuses qui se promènent sur nos prairies.

### Qu'est-ce que les Yeokens ?
Saviez-vous que lorsque vous achetez nos produits, vous pouvez collecter des points appelés Yeokens ? Ces points vous permettent de gagner des prix ! Vous pouvez les utiliser pour participer à des jeux, des tirages au sort, et même obtenir des réductions sur d'autres produits. Amusez-vous à les collecter !

### Jouez et Gagnez !
Nous avons des jeux amusants sur notre site web. Jouez avec nous et vous pourriez gagner des incroyables récompenses, y compris des montres Garmin ! 

### Pourquoi choisir Yeo Valley ?
- **Produits Sains** : Tous nos produits sont biologiques et fabriqués sans additifs.
- **Amour pour la Nature** : Nous prenons soin de notre terre et de nos animaux.
- **Des occasions de s’amuser !** : Avec Yeo Valley, vous pouvez jouer, gagner et apprendre à propos de la nutrition.

### Rejoignez-Nous !
Avez-vous déjà pensé à une carrière dans le monde de la cuisine ou de l'agriculture biologique ? Chez Yeo Valley, nous sommes toujours à la recherche de passionnés comme vous pour rejoindre notre équipe dynamique !

---

### Contactez-Nous
Pour en savoir plus sur nos produits ou pour vous impliquer, rendez-vous sur notre site internet : [Yeo Valley Organic](URL-du-site)

---

Amusez-vous et mangez sainement avec Yeo Valley !