In [17]:
import requests
from bs4 import BeautifulSoup
import re
import json


In [4]:
url = "https://www.kurashiru.com/recipes/6c656bdf-d8c9-427a-b669-f6e1d9f81fcd"

In [8]:
def get_html(url, params=None, headers=None):
    try:
        resp = requests.get(url, params=params, headers=headers)
        resp.encoding = 'utf8'
        soup = BeautifulSoup(resp.text, "html.parser")
        return soup
    except Exception as e:
        return None
        

In [158]:
def get_page_info(url):
    result = {}
    soup = get_html(url)
    
    try:
        script = soup.find('script', string=re.compile('window.__delyKurashiruEnvironment.ssrContext'))
        if script == None:
            return
        script_data = re.search(r'window\.__delyKurashiruEnvironment\.ssrContext\s*=\s*({.*?});', str(script))
        if script_data == None:
            return
        data = json.loads(script_data.group(1))
        if data == None:
            return
        attributes = data["state"]["fetchVideo"]["data"]["data"]["attributes"]
        ingredients = attributes["ingredients"]
        instructions = attributes["instructions"]

        result["recipe_id"] = data["state"]["fetchVideo"]["data"]["data"]["id"]
        result["recipe_name"] = attributes["title"]
        result["video_url"] = attributes["mp4-url"]
        result["img_small_url"] = attributes["thumbnail-square-small-url"]
        result["img_normal_url"] = attributes["thumbnail-square-normal-url"]
        result["img_large_url"] = attributes["thumbnail-square-large-url"]
        result["cooking_time"] = attributes["cooking-time"]
        result["introduction"] = attributes["introduction"]
        result["expense"] = attributes["expense"]
        result["serving"] = attributes["servings-size"]
        result["rating-count"] = attributes["rating-count"]
        result["calorie"] = attributes["calorie"]

        ingredient_list = []
        for ingredient in ingredients:
            if ingredient["type"] != "ingredients":
                continue
            ingredient_dic = {}
            ingredient_dic["ingredient_id"] = ingredient["id"]
            ingredient_dic["ingredient_title"] = ingredient["name"]
            ingredient_dic["ingredient_name"] = ingredient["actual-name"]
            ingredient_dic["ingredient_amount"] = ingredient["quantity-amount"]
            ingredient_list.append(ingredient_dic)
        result["ingredient_list"] = ingredient_list

        instruction_list = []
        for instruction in instructions:
            if instruction["type"] != "instructions":
                continue
            instruction_dic = {}
            instruction_dic["index"] = instruction["sort-order"]
            instruction_dic["body"] = instruction["body"]
            instruction_list.append(instruction_dic)
        result["instruction_list"] = instruction_list
    except Exception as e:
        return
    return result


In [179]:
def get_search_result_recipe_ids(keywords):
    recipe_ids = []
    base_url = 'https://www.kurashiru.com/search'
    params = {'query': ' '.join(keywords)}
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    if soup == None:
        return
    try:
        script = soup.find('script', string=re.compile('window.__delyKurashiruEnvironment.ssrContext'))
        if script == None:
            return
        script_data = re.search(r'window\.__delyKurashiruEnvironment\.ssrContext\s*=\s*({.*?});', str(script))
        if script_data == None:
            return
        data = json.loads(script_data.group(1))
        if data == None:
            return
        recipes = data['state']['fetchMergedContentsSearchV2']['data']['data']
        for recipe in recipes:
            recipe_ids.append(recipe['id'])
    except Exception as e:
        return
    return recipe_ids

In [180]:
keywords = ["たまねぎ", "キャベツ"]
get_search_result_urls(keywords)

['1258d283-820f-496c-9f86-41a445f6b93a',
 '97a84856-0b17-4933-99ec-26d0a3ecc12a',
 '835cabd1-d8cb-49e4-82e2-86422971a534',
 '9c1de890-1bf7-42cd-b4c2-04494b7966f8',
 'bd675224-83ed-43ff-802a-c067e7bbec1e',
 '6c656bdf-d8c9-427a-b669-f6e1d9f81fcd',
 'e246f553-24cf-4fbb-999f-21e92b45fbda',
 'af888da8-eebf-4eef-9fe3-d328856bba9f',
 'dafdf573-bf71-4504-9642-08c3915f4dd2',
 '17e6fc21-aa54-448e-b636-64e3d8eeb8f4',
 'b17c2d4c-2372-4859-b2a0-1675c6e8a375',
 '08dcadab-ae46-4e24-a1b9-ccae48fcf1c3',
 '19124a50-5b58-4fb1-b66e-5b422e468bd2',
 '215dc346-ec32-41ce-80dc-1fff99930c73',
 '7df4b906-0f60-4114-a7db-83bf0ca9ec8b',
 'eade0aff-404b-4fb4-b08b-7a8229ec26f2',
 '2bd298c7-7e17-460d-82df-ff060646de16',
 '4ffdddbc-ce5f-43ff-a657-15de17869c44',
 'd2743115-940a-4a7f-9666-e743958b3309',
 '8849809e-e2c5-4cba-8960-5243fd96b0c2',
 '6c7fa4a1-5235-4061-8bf5-0872e2f47068',
 '3851c1ef-b510-4f42-a538-0f33d6b1a439',
 'eb4b590a-dde5-42f7-9828-7a20307fe7f4',
 '386ba630-d66c-46ac-9aaf-c2a198412e3e',
 '67a25403-bc2f-