## 📦 Step 1: Install Required Libraries
Install all the required Python packages including Firecrawl, Langchain, OpenAI, Google Sheets, and Airtable libraries.


In [13]:
!pip install firecrawl-py \
            langchain \
            python-dotenv \
            google-auth \
            google-api-python-client \
            google-auth-httplib2 \
            google-auth-oauthlib \
            gspread \
            pyairtable


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## 🌐 Step 2: Get Website URL
Prompt the user to enter the URL of the website they want to analyze.


In [14]:
website = input('Enter a website url: ')

## 🕷️ Step 3: Scrape Website Using Firecrawl
Use the Firecrawl API to scrape the content of the given website in markdown format.


In [15]:
import os
from dotenv import load_dotenv
from firecrawl import FirecrawlApp

load_dotenv(override=True)

firecrawl = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API'))

scrape_result = firecrawl.scrape_url(website, formats=['markdown'])
markdown = scrape_result.markdown.strip()

## 🧠 Step 4: Use OpenAI and Langchain to Analyze Website Content
Send the scraped markdown to OpenAI using Langchain to generate a meaningful summary or insight about the website.


In [16]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(openai_api_key=os.getenv('OPENAI_API'), temperature=0)

prompt = PromptTemplate(
    input_variables=["web_data"],
    template="Given the website data \"{web_data}\" Make sense of the data and provide information about the website.")

chain = chain = prompt | llm

result = chain.invoke({"web_data": markdown})

## 📊 Step 5: Save Result to Google Sheets
Authenticate with Google Sheets and append the website and its summary to a predefined spreadsheet.


In [17]:
from google.oauth2.service_account import Credentials
import gspread

SERVICE_ACCOUNT_FILE = 'creds.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SPREADSHEET_ID = '1VarAr4qbBgM-0ixXa_-GrlDMy3y6bTz1EDipnAqps1E'


creds = Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

workbook = client.open_by_key(SPREADSHEET_ID)

sheet = workbook.worksheet('Sheet1')
sheet.append_row([website, result.content])

{'spreadsheetId': '1VarAr4qbBgM-0ixXa_-GrlDMy3y6bTz1EDipnAqps1E',
 'updates': {'spreadsheetId': '1VarAr4qbBgM-0ixXa_-GrlDMy3y6bTz1EDipnAqps1E',
  'updatedRange': 'Sheet1!A1:B1',
  'updatedRows': 1,
  'updatedColumns': 2,
  'updatedCells': 2}}

## 📋 Step 6: Save Result to Airtable
Authenticate with Airtable and insert a new record containing the website and its analysis result.


In [18]:
from pyairtable import Api


api = Api(os.getenv('AIRTABLE_API_KEY'))
table = api.table('app91Xn5ozfJumoDi', 'tblQOUjnjIU5MWXPM')

data = {
    'Website': website,
    'Info': result.content
}

records = table.create(data)