In [None]:
!pip install firecrawl-py \
            langchain \
            python-dotenv \
            google-auth \
            google-api-python-client \
            google-auth-httplib2 \
            google-auth-oauthlib \
            gspread \
            pyairtable

In [None]:
import os
from dotenv import load_dotenv
from firecrawl import FirecrawlApp

load_dotenv(override=True)

firecrawl = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API'))
website = 'wired.com'
# Set crawl parameters
scrape_result = firecrawl.scrape_url(website, formats=['markdown'])
web_data = scrape_result.markdown.strip()

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(openai_api_key=os.getenv('OPENAI_API'), temperature=0)

prompt = PromptTemplate(
    input_variables=["web_data"],
    template="Given the website data \"{web_data}\" Make sense of the data and provide information about the website.")

# LLMChain combines the model and the prompt
chain = LLMChain(llm=llm, prompt=prompt)

result = chain.run(web_data=web_data)  # Simplified process with LLMChain
prefix = f"Website: {website}\n\n"
print(prefix + result)

In [30]:
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials
import gspread

SERVICE_ACCOUNT_FILE = 'creds.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SPREADSHEET_ID = '1VarAr4qbBgM-0ixXa_-GrlDMy3y6bTz1EDipnAqps1E'


creds = Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

workbook = client.open_by_key(SPREADSHEET_ID)

sheet = workbook.worksheet('Sheet1')
sheet.append_row([website, result])


{'spreadsheetId': '1VarAr4qbBgM-0ixXa_-GrlDMy3y6bTz1EDipnAqps1E',
 'updates': {'spreadsheetId': '1VarAr4qbBgM-0ixXa_-GrlDMy3y6bTz1EDipnAqps1E',
  'updatedRange': 'Sheet1!A1:B1',
  'updatedRows': 1,
  'updatedColumns': 2,
  'updatedCells': 2}}

In [None]:
count = sheet.append_row()
print(count)

1000


In [None]:
import os
from pyairtable import Api


api = Api(os.getenv('AIRTABLE_API_KEY'))
table = api.table('app91Xn5ozfJumoDi', 'tblQOUjnjIU5MWXPM')

data = {
    'Website': website,
    'Info': result
}

records = table.create(data)


In [23]:
print(records)

[{'id': 'rec6hgR8d7M6izeMY', 'createdTime': '2025-05-09T10:36:01.000Z', 'fields': {}}, {'id': 'recBdtMbPnJIEGKqW', 'createdTime': '2025-05-09T10:36:01.000Z', 'fields': {'Name': 'First', 'Info': 'Info'}}, {'id': 'recbMlMzn9XJ0pyrb', 'createdTime': '2025-05-09T10:36:01.000Z', 'fields': {}}]
