# FireCrawl example

- [firecrawl](https://firecrawl.dev) 테스트
- DOM 구조를 몰라도 의미적으로 필요한 정보를 가져올 수 있다.

## Install Deps

In [1]:
!pip -q install firecrawl-py -U

## Setup

In [28]:
import os
from typing import List
from getpass import getpass

from pydantic import BaseModel, Field
from firecrawl import FirecrawlApp

In [3]:
FIRE_CRAWL_API_KEY = os.environ.get("FIRE_CRAWL_API_KEY")
if not FIRE_CRAWL_API_KEY:
    FIRE_CRAWL_API_KEY = getpass("FIRE_CRAWL_API_KEY: ")

FIRE_CRAWL_API_KEY:  ········


In [29]:
app = FirecrawlApp(api_key=FIRE_CRAWL_API_KEY)

## Usage

In [56]:
url = "https://lol.inven.co.kr/dataninfo/champion/manualTool.php?confirm=2&season=14"
url

'https://lol.inven.co.kr/dataninfo/champion/manualTool.php?confirm=2&season=14'

In [57]:
class Article(BaseModel):
    """
    Champion tactic article schema.
    Contains details about how to play and win with a specific LOL champion.
    """

    title: str = Field(..., description="The title of the article.")
    url: str = Field(..., description="The URL link to the article.")
    season: int = Field(..., description="The LOL season number for the article.")
    published_at: str = Field(
        ..., 
        description="The published date of the article in RFC 3339 format."
    )

class ExtractSchema(BaseModel):
    """
    Schema to extract articles for the tactics from a page.
    """

    articles: List[Article] = Field(
        ..., 
        description="A list of LOL champion tactic article objects extracted from the page."
    )

In [58]:
ExtractSchema.model_json_schema()

{'$defs': {'Article': {'description': 'Champion tactic article schema.\nContains details about how to play and win with a specific LOL champion.',
   'properties': {'title': {'description': 'The title of the article.',
     'title': 'Title',
     'type': 'string'},
    'url': {'description': 'The URL link to the article.',
     'title': 'Url',
     'type': 'string'},
    'season': {'description': 'The LOL season number for the article.',
     'title': 'Season',
     'type': 'integer'},
    'published_at': {'description': 'The published date of the article in RFC 3339 format.',
     'title': 'Published At',
     'type': 'string'}},
   'required': ['title', 'url', 'season', 'published_at'],
   'title': 'Article',
   'type': 'object'}},
 'description': 'Schema to extract articles for the tactics from a page.',
 'properties': {'articles': {'description': 'A list of LOL champion tactic article objects extracted from the page.',
   'items': {'$ref': '#/$defs/Article'},
   'title': 'Articles'

In [59]:
%%time

data = app.scrape_url(url, {
    'formats': ['extract'],
    'extract': {
        'prompt': 'Extract the LOL champion tactic articles at the <table> tag in the page. Each table row is an article.',
        'schema': ExtractSchema.model_json_schema(),
    }
})
print(data.keys())

dict_keys(['metadata', 'extract'])
CPU times: user 15.7 ms, sys: 6.29 ms, total: 22 ms
Wall time: 11.7 s


In [60]:
tactics = data['extract']['articles']
print(len(tactics))
tactics

4


[{'title': '[GM]AP 샤코 서폿 설명 길게 안함',
  'url': 'manualToolView.php?idx=146545',
  'season': 14,
  'published_at': '09-22'},
 {'title': '★(마스터) 시즌 완벽 적응 개사기 ..',
  'url': 'manualToolView.php?idx=148044',
  'season': 14,
  'published_at': '07-26'},
 {'title': '[GM1]프로 1군원딜들 피셜 근본 원딜..',
  'url': 'manualToolView.php?idx=148047',
  'season': 14,
  'published_at': '03-16'},
 {'title': 'M)시즌 5부터 딩거 한 유저의 공략',
  'url': 'manualToolView.php?idx=148020',
  'season': 14,
  'published_at': '02-05'}]