## 📦 Step 1: Install Required Libraries
Install all the required Python packages including Firecrawl, Langchain, OpenAI, Google Sheets, and Airtable libraries.


In [None]:
!pip install firecrawl-py \
            langchain \
            python-dotenv \
            google-auth \
            google-api-python-client \
            google-auth-httplib2 \
            google-auth-oauthlib \
            gspread \
            pyairtable

## 🌐 Step 2: Get Website URL
Prompt the user to enter the URL of the website they want to analyze.


In [None]:
website = input('Enter a website url: ')

## 🕷️ Step 3: Scrape Website Using Firecrawl
Use the Firecrawl API to scrape the content of the given website in markdown format.


In [None]:
import os
from dotenv import load_dotenv
from firecrawl import FirecrawlApp

load_dotenv(override=True)

firecrawl = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API'))
website = 'wired.com'
# Set crawl parameters
scrape_result = firecrawl.scrape_url(website, formats=['markdown'])
web_data = scrape_result.markdown.strip()

## 🧠 Step 4: Use OpenAI and Langchain to Analyze Website Content
Send the scraped markdown to OpenAI using Langchain to generate a meaningful summary or insight about the website.


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

llm = ChatOpenAI(openai_api_key=os.getenv('OPENAI_API'), temperature=0)

prompt = PromptTemplate(
    input_variables=["web_data"],
    template="Given the website data \"{web_data}\" Make sense of the data and provide information about the website.")

# LLMChain combines the model and the prompt
chain = LLMChain(llm=llm, prompt=prompt)

result = chain.run(web_data=web_data)  # Simplified process with LLMChain

## 📊 Step 5: Save Result to Google Sheets
Authenticate with Google Sheets and append the website and its summary to a predefined spreadsheet.


In [None]:
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials
import gspread

SERVICE_ACCOUNT_FILE = 'creds.json'
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SPREADSHEET_ID = '1VarAr4qbBgM-0ixXa_-GrlDMy3y6bTz1EDipnAqps1E'


creds = Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

workbook = client.open_by_key(SPREADSHEET_ID)

sheet = workbook.worksheet('Sheet1')
sheet.append_row([website, result])


## 📋 Step 6: Save Result to Airtable
Authenticate with Airtable and insert a new record containing the website and its analysis result.


In [None]:
import os
from pyairtable import Api


api = Api(os.getenv('AIRTABLE_API_KEY'))
table = api.table('app91Xn5ozfJumoDi', 'tblQOUjnjIU5MWXPM')

data = {
    'Website': website,
    'Info': result
}

records = table.create(data)
