## Imports

In [None]:
import pandas as pd
import numpy as np
import csv

# Data Sourcing Guide

## Import CSV

In [None]:
import csv

with open('data/ramen-ratings.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    print(csv_reader)

## API

In [None]:
import requests

url = "https://www.metaweather.com/api/location/search/?query=london"
response = requests.get(url).json()
city = response[0]
print(f"{city['title']}: {city['woeid']} ({city['latt_long']})")

In [None]:
#with params - example, code doesn't run

url = "https://someurl.com/search?query="
params = {
    "_apikey" : "xxx",
    "url" : "https://someurl.com/search?sortby=Price_LH&per_page=96&size=1%2C12&page=35"
}

response = requests.get(url, params=params)

## Scraping

### Beautiful Soup

In [None]:
from bs4 import BeautifulSoup


url = "https://recipes.lewagon.com/?search[query]=carrot"
response = requests.get(url).text
soup = BeautifulSoup(response, "html.parser")

#for recipe in soup.find_all('p', class_= 'recipe-name'):
#    print(recipe.text)

#### Scraping - Parsing

In [None]:
recipes = []
for recipe in soup.find_all("div", class_ ="recipe my-3"):
    name = recipe.find("p", class_ = "text-dark text-truncate w-100 font-weight-bold mb-0 recipe-name").string
    difficulty = recipe.find("span", class_="recipe-difficulty").string
    prep_time = recipe.find("span", class_ = "recipe-cooktime"). string
    recipes.append({'name': name, 'difficulty': difficulty, 'prep_time': prep_time})

#### Scraping - Navigate Data Structure

In [None]:
soup.title
# <title>The Dormouse's story</title>

soup.title.name
# u'title'

soup.title.string
# u'The Dormouse's story'

soup.title.parent.name
# u'head'

soup.p
# <p class="title"><b>The Dormouse's story</b></p>

soup.p['class']
# u'title'

soup.a
# <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>

soup.find_all('a')
# [<a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>,
#  <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>,
#  <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>]

soup.find(id="link3")
# <a class="sister" href="http://example.com/tillie" id="link3">Tillie</a>

### Advanced Scraping - Selenium

In [None]:
from selenium import webdriver
import chromedriver_binary
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec

def launchBrowser():
    driver = webdriver.Chrome()
    driver.get("https://recipes.lewagon.com/recipes/advanced")
    return driver


driver = launchBrowser()

In [None]:
search_input = driver.find_element_by_id(
    'search_query')  #find_element_by_id will be deprecated soon
search_input.send_keys('chocolate')
search_input.submit()

In [None]:
wait = WebDriverWait(driver, 15)
wait.until(ec.visibility_of_element_located(
    (By.XPATH, "//div[@id='recipes']")))

recipe_urls = []
cards = driver.find_elements_by_xpath("//div[@class='recipe my-3']")
print(f"Found {len(cards)} results on the page")

for card in cards:
    url = card.get_attribute('data-href')
    recipe_urls.append(url)

recipes = []
for url in recipe_urls:
    print(f"Navigating to {url}")
    driver.get(url)
    wait.until(
        ec.visibility_of_element_located(
            (By.XPATH,
             "//div[@class='p-3 border bg-white rounded-lg recipe-container']"
             )))

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    name = soup.find('h2').string.strip()
    cooktime = soup.find('span', class_='recipe-cooktime').text.strip()
    difficulty = soup.find('span', class_='recipe-difficulty').text.strip()
    price = soup.find('small',
                      class_='recipe-price').attrs.get('data-price').strip()
    description = soup.find('p', class_='recipe-description').text.strip()
    recipes.append({
        'name': name,
        'cooktime': cooktime,
        'difficulty': difficulty,
        'price': price,
        'description': description
    })