# House Hunter
This notebook uses the code from `browser.py` to clarify the steps and debug the code developed in 2021. Functions learned and developed in 2023, during the Monash University Data Analytics Bootcamp, are used when simpler, more convenient, or more adapted.

In [1]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys

## Open https://www.bbcgoodfood.com/ in browser

In [4]:
# Open automated browser
browser = Browser('chrome')

# Visit website
base_url = "https://www.bbcgoodfood.com"

search_term = 'healthy'
search = f"/search?q={search_term}"

browser.visit(base_url + search)

# Create a Beautiful Soup object
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Ready message
print("Browser open. Session started.")

articles = soup.find_all('article', class_='card text-align-left card--horizontal card--inline card--with-borders')

links = []

for element in articles:
    links.append(element.find('a')['href'])

print(f"{len(links)} recipes found.")

SessionNotCreatedException: Message: session not created: This version of ChromeDriver only supports Chrome version 117
Current browser version is 119.0.6045.124 with binary path C:\Program Files\Google\Chrome\Application\chrome.exe
Stacktrace:
	GetHandleVerifier [0x00007FF6E40E7892+54818]
	(No symbol) [0x00007FF6E4056AC2]
	(No symbol) [0x00007FF6E3F0DA3B]
	(No symbol) [0x00007FF6E3F40972]
	(No symbol) [0x00007FF6E3F3FDCE]
	(No symbol) [0x00007FF6E3F3AA4B]
	(No symbol) [0x00007FF6E3F3802E]
	(No symbol) [0x00007FF6E3F767FB]
	(No symbol) [0x00007FF6E3F6E883]
	(No symbol) [0x00007FF6E3F43691]
	(No symbol) [0x00007FF6E3F448D4]
	GetHandleVerifier [0x00007FF6E444B992+3610402]
	GetHandleVerifier [0x00007FF6E44A1860+3962352]
	GetHandleVerifier [0x00007FF6E4499D4F+3930847]
	GetHandleVerifier [0x00007FF6E4183646+693206]
	(No symbol) [0x00007FF6E4061628]
	(No symbol) [0x00007FF6E405D934]
	(No symbol) [0x00007FF6E405DA62]
	(No symbol) [0x00007FF6E404E113]
	BaseThreadInitThunk [0x00007FFAA5247344+20]
	RtlUserThreadStart [0x00007FFAA6C226B1+33]


In [2]:
def get_recipes(links, exlude_base):
    # Create list to save all recipes
    recipe_dicts = []
    count = 0

    for link in links:

        # Update counter
        count += 1
        print(f"Recipe {count}/{len(links)}")

        # Create recipe url
        if exlude_base:
            recipe_url = link
        else:
            recipe_url = base_url + link
            
        # Visit page
        browser.visit(recipe_url)

        # Create a Beautiful Soup object
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')

        # Ready message
        print(f"Now on page: {recipe_url}")

        # Get recipe name
        name = soup.find('h1', class_='heading-1').get_text()

        print(f"Recipe: {name}")

        # Get ingredients
        ingredients_html = soup.find_all('li', class_='pb-xxs pt-xxs list-item list-item--separator')

        ingredients = []

        for ing in ingredients_html:
            ingredients.append(ing.get_text())

        img_html = soup.find_all('div', class_='image chromatic-ignore post-header-image image--fluid image--scaled-up')
        img_src = img_html[0].find('img')['src']

        # Get steps
        steps_html = soup.find('section', class_='recipe__method-steps mb-lg col-12 col-lg-6').find_all('li')

        steps = []
        for step in steps_html:
            steps.append(step.get_text())

        # Get rating
        rating = soup.find('div', class_='rating__values')
        stars = rating.find_all('span')[0].get_text()
        votes = rating.find_all('span')[1].get_text()

        print(f"{stars} {votes}")

        # Save recipe in a dictionary
        recipe = {'name': name,
                'link': recipe_url,
                'ingredients_num': len(ingredients),
                'ingredients': ingredients,
                'steps': steps,
                'stars': stars,
                'votes': votes,
                'img': img_src}
        
        # Add dictionary to list
        recipe_dicts.append(recipe)

    return recipe_dicts


In [3]:
# Manual recipe entry
links_231017 = ['https://www.bbcgoodfood.com/recipes/turkey-pasta-bake',
                'https://www.bbcgoodfood.com/recipes/chicken-tinga-style-enchiladas',
                'https://www.bbcgoodfood.com/recipes/chole-with-cumin-rice-raita',
                'https://www.bbcgoodfood.com/recipes/spicy-sausage-bean-one-pot',
                'https://www.bbcgoodfood.com/recipes/red-lentil-squash-dhal',
                'https://www.bbcgoodfood.com/recipes/scandi-meatballs',
                'https://www.bbcgoodfood.com/recipes/pilchard-puttanesca',
                'https://www.bbcgoodfood.com/recipes/chorizo-rosemary-pearl-barley-risotto',
                'https://www.bbcgoodfood.com/recipes/smoky-sweet-potato-bean-cakes-citrus-salad',
                'https://www.bbcgoodfood.com/recipes/courgette-sausage-rigatoni-bakes',
                'https://www.bbcgoodfood.com/recipes/clementine-pork-steaks',
                'https://www.bbcgoodfood.com/recipes/sticky-chinese-chicken-traybake',
                'https://www.bbcgoodfood.com/recipes/jumbo-sausage-roll-salsa-beans',
                'https://www.bbcgoodfood.com/recipes/smoky-chickpeas-toast',
                'https://www.bbcgoodfood.com/recipes/smoked-mackerel-chowder-hedgehog-garlic-bread-0',
                'https://www.bbcgoodfood.com/recipes/miso-mushroom-tofu-noodle-soup',
                'https://www.bbcgoodfood.com/recipes/corned-beef-hash',
                'https://www.bbcgoodfood.com/recipes/cali-kale-sausage-bake']

links_231025 = ['https://www.bbcgoodfood.com/recipes/chicken-tinga-style-enchiladas',
                'https://www.bbcgoodfood.com/recipes/chicken-new-potato-traybake',
                'https://www.bbcgoodfood.com/recipes/classic-meatloaf-tomato-sauce',
                'https://www.bbcgoodfood.com/recipes/butternut-squash-cherry-tomato-crumble',
                'https://www.bbcgoodfood.com/recipes/simple-mushroom-curry',
                'https://www.bbcgoodfood.com/recipes/bean-enchiladas',
                'https://www.bbcgoodfood.com/recipes/creamy-baked-gnocchi-with-squash-spinach',
                'https://www.bbcgoodfood.com/recipes/curried-sate-noodles',
                'https://www.bbcgoodfood.com/recipes/satay-sweet-potato-curry',
                'https://www.bbcgoodfood.com/recipes/bean-halloumi-stew']

links_231113 = ['https://www.bbcgoodfood.com/recipes/spicy-sausage-bean-one-pot',
                'https://www.bbcgoodfood.com/recipes/piri-piri-chicken-pittas',
                'https://www.bbcgoodfood.com/recipes/sesame-pork-meatballs-chilli-noodle-broth',
                'https://www.bbcgoodfood.com/recipes/beef-curry',
                'https://www.bbcgoodfood.com/recipes/aubergine-halloumi-harissa-skillet-bake',
                'https://www.bbcgoodfood.com/recipes/leek-mushroom-gruyere-quiche']

# Save recipes in a DataFrame
import pandas as pd
recipe_dicts = get_recipes(links_231113, True)
recipes_df = pd.DataFrame(recipe_dicts)

# Save df as a CSV file (optional)
recipes_df.to_csv('recipes_2023-11-13.csv', index=False)

recipes_df

Recipe 1/6


NameError: name 'browser' is not defined

In [11]:
# Save df as a CSV file (optional)
recipes_df.to_csv('recipes_2023-10-17.csv', index=False)

In [136]:
# Save recipes as a JSON file
recipes_df.to_json('recipes.json')

## End session

In [3]:
# End the automated browsing session
browser.quit()