In [None]:
import pandas as pd
import json
import os
import nest_asyncio
from typing import List, Optional, Literal
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
from pydantic import BaseModel, Field
from google import genai
from google.genai import types
from dotenv import load_dotenv

### Filter List

In [46]:
df = pd.read_json('venues.json')

In [47]:
df.head()

Unnamed: 0,id,name,location,contact,type,tagline,price_range_id,rating,num_ratings,url_slug,enable_discovery,enable_for_amex,deep_link,metadata,web_link
0,"{'resy': 84342, 'foursquare': None, 'google': ...",Like Music VIP Cancún,{'address_1': 'Av Xcaret Supermanzana 35 Manza...,"{'phone_number': None, 'url': None}",Cocktail Bar,,2,,0.0,like-music-vip-cancun,1,1,resy://resy.com/VenueDetails?venue_id=%7B%27re...,{'description': ' Like Music VIP Cancún is a C...,"https://resy.com/?venue_id={'resy': 84342, 'fo..."
1,"{'resy': 81110, 'foursquare': None, 'google': ...",Restaurante Da Enzo Playa del Carmen,"{'address_1': 'Calle 42 Entre Av. 10 y, 5 Av. ...","{'phone_number': None, 'url': None}",Italian,,2,,0.0,restaurante-da-enzo-playa-del-carmen,1,1,resy://resy.com/VenueDetails?venue_id=%7B%27re...,{'description': ' Restaurante Da Enzo Playa de...,"https://resy.com/?venue_id={'resy': 81110, 'fo..."
2,"{'resy': 78530, 'foursquare': None, 'google': ...",Restaurante El Plebe Bichi Teotihuacan,"{'address_1': 'Calle Emilio Carranza 222, 5585...","{'phone_number': None, 'url': None}",Seafood,,2,,0.0,restaurante-el-plebe-bichi-teotihuacan,1,1,resy://resy.com/VenueDetails?venue_id=%7B%27re...,{'description': ' Restaurante El Plebe Bichi T...,"https://resy.com/?venue_id={'resy': 78530, 'fo..."
3,"{'resy': 78730, 'foursquare': None, 'google': ...",Restaurante La Mentirosa Los Mochis,"{'address_1': 'Blvd Centenario 805, Centro, 81...","{'phone_number': None, 'url': None}",International,,2,,0.0,restaurante-la-mentirosa-los-mochis,1,1,resy://resy.com/VenueDetails?venue_id=%7B%27re...,{'description': ' Restaurante La Mentirosa Los...,"https://resy.com/?venue_id={'resy': 78730, 'fo..."
4,"{'resy': 75788, 'foursquare': None, 'google': ...",Restaurante Salmone's Morelia Suc. Siervo,"{'address_1': 'Av Siervo de La Nacion s/n, Agu...","{'phone_number': None, 'url': None}",Seafood,,2,,0.0,restaurante-salmones-morelia-suc-siervo,1,1,resy://resy.com/VenueDetails?venue_id=%7B%27re...,{'description': ' Restaurante Salmone's Moreli...,"https://resy.com/?venue_id={'resy': 75788, 'fo..."


In [48]:
df_flat = pd.json_normalize(df['location'])

In [49]:
df = pd.concat([df, df_flat.add_prefix('loc_')], axis=1)

In [50]:
df_nyc = df[df["loc_url_slug"] == 'new-york-ny'].reset_index(drop=True)

In [51]:
df_nyc_flat_url = pd.json_normalize(df_nyc['contact'])
df_nyc_flat_id = pd.json_normalize(df_nyc['id'])

In [52]:
df_nyc = df_nyc.drop(columns=['contact']).join(df_nyc_flat_url)
df_nyc = df_nyc.drop(columns=['id']).join(df_nyc_flat_id)

In [53]:
df_nyc = df_nyc.drop(columns=['location'])

In [54]:
df_nyc.columns

Index(['name', 'type', 'tagline', 'price_range_id', 'rating', 'num_ratings',
       'url_slug', 'enable_discovery', 'enable_for_amex', 'deep_link',
       'metadata', 'web_link', 'loc_address_1', 'loc_address_2',
       'loc_locality', 'loc_region', 'loc_postal_code', 'loc_cross_street_1',
       'loc_cross_street_2', 'loc_longitude', 'loc_latitude',
       'loc_neighborhood', 'loc_time_zone', 'loc_url_slug', 'loc_id',
       'phone_number', 'url', 'resy', 'foursquare', 'google'],
      dtype='object')

In [55]:
enrichdf = df_nyc[["resy", "foursquare", "google", "name", "type", "price_range_id", "rating", "num_ratings", "web_link", "loc_id", "loc_neighborhood", "phone_number", "url"]]

In [67]:
enrichdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2467 entries, 0 to 2466
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   resy              2467 non-null   int64  
 1   foursquare        1391 non-null   object 
 2   google            2466 non-null   object 
 3   name              2467 non-null   object 
 4   type              2467 non-null   object 
 5   price_range_id    2467 non-null   int64  
 6   rating            2439 non-null   float64
 7   num_ratings       2439 non-null   float64
 8   web_link          2467 non-null   object 
 9   loc_id            2467 non-null   object 
 10  loc_neighborhood  2467 non-null   object 
 11  phone_number      2293 non-null   object 
 12  url               2420 non-null   object 
dtypes: float64(2), int64(2), object(9)
memory usage: 250.7+ KB


### Crawl4All 

In [56]:
load_dotenv()

True

In [57]:
nest_asyncio.apply()

In [58]:
client = genai.Client(api_key=os.getenv("GENAI_API_KEY")) 

#### Define Data Models

In [59]:
class ClassifiedLink(BaseModel):
    url: str = Field(description="The href URL from the input.")
    text: str = Field(description="The anchor text from the input. May be gibberish.")
    category: Literal['ordering', 'gift_card', 'instagram', 'private_events', 'other'] = Field(
        description="The category: 'ordering', 'gift_card', 'instagram', 'private_events', or 'other'."
    )

class LinkCollection(BaseModel):
    classified_links: List[ClassifiedLink]

class RestaurantTechProfile(BaseModel):
    pos_system: Optional[str] = Field(None, description="Inferred POS (Toast, Square, etc)")
    tech_stack: List[str] = Field(default_factory=list, description="Other systems (Bentobox, OpenTable, etc)")
    ordering_provider: Optional[str] = Field(None, description="Who powers the online ordering?")
    instagram_handle: Optional[str] = Field(None, description="The extracted handle (e.g. 'thesmithnyc')")
    tripleseat_status: str = Field("Not Found", description="Confirmed, Suspected, or Not Found")


#### Categorization LLM Helper (GeminiFlash)

In [60]:
def classify_links_flash(links: List[dict]) -> LinkCollection:
    """
    Uses gemini-flash-latest (Gemini 2.5) to categorize links 
    instead of guessing with keywords.
    """
    # Prepare batch for LLM
    candidates = links[:100]
    
    if not candidates:
        return []

    # We ask for a Map of Index -> Category to keep response small
    prompt = f"""
    You are a restaurant bot link classifier.
    Classify the following links based on their text and href into:
    - 'ordering' (Online ordering, takeout, delivery, 'order now')
    - 'gift_card' (Gift cards, merch, store)
    - 'instagram' (Social media links to Instagram)
    - 'private_events' (Private dining, event booking, party reservations)
    - 'other' (Menus, about, contact, locations, reservations)

    If the text is gibberish or empty, RELY MORE on the href URL to classify.

    Input Links:
    {json.dumps(candidates)}
    """

    try:
        response = client.models.generate_content(
            model="gemini-flash-latest", # CURRENTLY GEMINI 2.5 FLASH
            contents=prompt,
            config={
                "response_mime_type": "application/json",
                "response_json_schema": LinkCollection.model_json_schema(),
            }
        )
        
        # Parse map and rebuild list
        classifications = LinkCollection.model_validate_json(response.text)

        return classifications

    except Exception as e:
        print(f"Flash Classification Error: {e}")
        return LinkCollection(classified_links=[])

#### Analyze Tech Stack (Gemini3)

In [61]:
def analyze_tech_stack_gemini3(
    classified_links: List[ClassifiedLink], 
    script_domains: List[str], 
    footer_text: str,
    deep_dive_signals: List[str]
) -> RestaurantTechProfile:
    """
    Uses gemini-3-pro-preview to reason about the signals found.
    """
    
    # Organize data for the model
    ordering_urls = [l.url for l in classified_links if l.category == "ordering"]
    gift_urls = [l.url for l in classified_links if l.category == "gift_card"]
    socials = [l.url for l in classified_links if l.category == "instagram"]
    
    prompt = f"""
    Analyze these signals to determine the Restaurant's Tech Stack.
    
    1. Validated Ordering Links: {ordering_urls}
    2. Validated Gift Card Links: {gift_urls}

    3. **Deep Dive Signals (Ordering/Gift Pages):** 
    {json.dumps(deep_dive_signals, indent=2)}
    (IMPORTANT: These are links/redirects found AFTER clicking the ordering/gift buttons. 
     Look here for 3rd party POS domains like 'toasttab.com', 'spoton.com', 'clover.com'.)

    4. Loaded Scripts/Domains: {script_domains}
    5. Footer Text: {footer_text}
    6. Social Links: {socials}
    
    Task:
    - Identify the POS System (Point of Sale). 
      - PRIORITY: Look at "Deep Dive Signals". If a link redirects to or points to a known POS (Toast, Square, SpotOn, Upserve, etc), that is the POS.
      - SECONDARY: Look at Scripts.
    - Identify the Website Builder (e.g., Bentobox, Squarespace).
    - Extract the Instagram Handle.
    """

    try:
        response = client.models.generate_content(
            model="gemini-3-pro-preview", # DEEP REASONING MODEL
            contents=prompt,
            config={
                "thinking_config": types.ThinkingConfig(thinking_level="low"),
                "response_mime_type": "application/json",
                "response_json_schema": RestaurantTechProfile.model_json_schema(),
            }
        )

        techprofile = RestaurantTechProfile.model_validate_json(response.text)
        return techprofile
    except Exception as e:
        print(f"Gemini 3 Error: {e}")
        return RestaurantTechProfile()

#### Main Logic

In [63]:
async def process_restaurant(crawler, start_url, config):
    print(f"--- Processing: {start_url} ---")
    
    # 1. Crawl Homepage
    result = await crawler.arun(url=start_url, config=config)
    if not result.success: return None

    # 2. Extract Basic Signals (Scripts & Footer)
    soup = BeautifulSoup(result.html, 'html.parser')
    scripts = set()
    for s in soup.find_all('script', src=True):
        domain = urlparse(s.get('src')).netloc
        if domain: scripts.add(domain)
    
    footer = soup.find('footer').get_text(" ", strip=True)[:300] if soup.find('footer') else ""
    
    # Check Tripleseat on Homepage
    ts_found = False
    if "tripleseat.com" in result.html or soup.find(id="tripleseat-form"):
        ts_found = True

    '''
    return {
        "result": result,
        "soup": soup,
        "scripts": scripts,
        "footer": footer,
        "ts_found": ts_found}
    
    '''
    # 3. USE GEMINI FLASH: Classify Links
    # We merge internal and external links for classification
    all_links = [{"text": l['text'], "href": urljoin(start_url, l['href'])} 
                 for l in result.links.get('internal', []) + result.links.get('external', [])]
    
    link_collection = classify_links_flash(all_links)
    classified_links = link_collection.classified_links

    # --- NEW STEP 3.5: Drill down into Ordering/Giftcard links ---
    # We visit these pages to find:
    # A) Redirects (e.g. /order -> toasttab.com)
    # B) Links ON that page (e.g. /order -> Button href="toasttab.com")
    deep_dive_signals = []
    ordering_candidates = [l for l in classified_links if l.category == "ordering"]
    gift_candidates = [l for l in classified_links if l.category == "gift_card"]
    urls_to_drill = (ordering_candidates + gift_candidates)[:4]

    for link_obj in urls_to_drill:
        print(f"  > Drilling down into tech link: {link_obj.url}")
        try:
            sub_res = await crawler.arun(url=link_obj.url)

            if sub_res.success:
                # Signal A: Did we get redirected?
                # Compare the final URL to the one we clicked.
                # If we clicked /order and ended up on toasttab.com, that's a strong signal.
                if urlparse(sub_res.url).netloc != urlparse(link_obj.url).netloc:
                     deep_dive_signals.append(f"Redirect from {link_obj.text}: {sub_res.url}")

                # Signal B: Scan for External Links on this sub-page
                # This handles the case where the page is internal but contains a button to the POS.
                # We extract external links found on this sub-page.
                external_links = sub_res.links.get("external", [])

                for ext_link in external_links[:15]:
                    href = ext_link.get('href', '')
                    if href:
                        deep_dive_signals.append(f"Link on '{link_obj.text}' page: {href}")

                # Signal C: Capture scripts on this sub-page
                sub_soup = BeautifulSoup(sub_res.html, 'html.parser')
                for s in sub_soup.find_all('script', src=True):
                    domain = urlparse(s.get('src')).netloc
                    if domain: scripts.add(domain)
        except Exception as e:
            print(f"  ! Failed to drill down {link_obj.url}: {e}")
    
    # 4. USE GEMINI 3: Analyze Tech Stack
    # We pass the *clean, categorized* data to the smart model
    tech_profile = analyze_tech_stack_gemini3(
        classified_links, 
        list(scripts)[:50], 
        footer,
        deep_dive_signals # <--- Passing the deep dive content
    )

    if ts_found: 
        tech_profile.tripleseat_status = "Confirmed (Homepage)"
    
    # Update Tripleseat status if found on home
    if ts_found: 
        tech_profile.tripleseat_status = "Confirmed (Homepage)"

    # 5. Navigate to Private Events (if not already found)
    # We look for the link categorized as 'private_events' by Flash
    events_link = next((l for l in classified_links if l.category == "private_events"), None)
    if events_link:
        print(f"  > Flash identified Events page: {events_link.url}")
        try:
            evt_res = await crawler.arun(url=events_link.url)
            if evt_res.success:
                if "tripleseat.com" in evt_res.html or "tripleseat" in evt_res.html.lower():
                    tech_profile.tripleseat_status = "Confirmed (Events Page)"
                elif tech_profile.tripleseat_status == "Not Found":
                    tech_profile.tripleseat_status = "Not Found on Events Page"
        except Exception as e:
            print(f"  ! Failed to process Events page {events_link.url}: {e}")

    return {
        "url": start_url,
        "pos": tech_profile.pos_system,
        "stack": tech_profile.tech_stack,
        "instagram": tech_profile.instagram_handle,
        "tripleseat": tech_profile.tripleseat_status,
        "ordering_url": next((l.url for l in classified_links if l.category == "ordering"), None)
    }
    

In [85]:
async def main():
    manual_urls = list(set(enrichdf["url"][:20]))
    manual_urls_nonull = [item for item in manual_urls if item is not None]
    manual_urls_nonull.append('https://www.rezdora.nyc')
    
    urls = manual_urls_nonull
    print(len(urls), "URLs to process.")
    
    results = []
    crawler_run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
    async with AsyncWebCrawler(verbose=False) as crawler:
        for url in urls:
            try:
                data = await process_restaurant(crawler, url, config=crawler_run_config)
                if data: results.append(data)
            except Exception as e:
                print(f"Error on {url}: {e}")

    df = pd.DataFrame(results)
    display(df)

In [86]:
await main()

18 URLs to process.


--- Processing: https://www.25hours.bar/ ---


--- Processing: https://1803nyc.com ---


  > Drilling down into tech link: https://order.toasttab.com/online/1803-82-reade-street


  > Drilling down into tech link: https://www.toasttab.com/1803-82-reade-street/giftcards


  > Flash identified Events page: https://1803nyc.com/new-york-tribeca-1803-nyc-party


--- Processing: https://www.19cleveland.com/ ---


  > Drilling down into tech link: https://www.getsauce.com/order/19-cleveland/menu


  > Drilling down into tech link: https://www.19cleveland.getsauce.com/


  > Drilling down into tech link: https://squareup.com/gift/ME9Q35V8WP0TS/order?external_source=postoffice-egift-widget&external_token=oA0zrN2lMYi0glhp


  > Flash identified Events page: https://19cleveland.com/events-large-parties


--- Processing: http://www.44andx.com/ ---


  > Drilling down into tech link: https://www.44andx.com/takeaway.html


  > Drilling down into tech link: https://www.grubhub.com/restaurant/44--x-hells-kitchen-622-10th-ave-new-york/2875531


--- Processing: https://sushi456.com/ ---


  > Drilling down into tech link: http://order.sushi456.com/


--- Processing: http://www.11tigersnyc.com ---


  > Drilling down into tech link: https://www.11tigersnyc.com/cart


  > Drilling down into tech link: https://www.11tigersnyc.com/delivery


  > Flash identified Events page: https://www.11tigersnyc.com/catering


--- Processing: http://www.nycprimerib.com/ ---


  > Drilling down into tech link: https://www.nycprimerib.com/cart


  > Drilling down into tech link: https://www.nycprimerib.com/gift-cards


  > Flash identified Events page: https://www.nycprimerib.com/private-dining


--- Processing: http://www.fiveacresnyc.com ---


  > Drilling down into tech link: https://order.toasttab.com/online/5acres


  > Drilling down into tech link: https://www.toasttab.com/spurdock-30-rock-suite-8-rink-level/giftcards


  > Flash identified Events page: https://www.5acresnyc.com/private-events


--- Processing: https://www.3giovani.com/ ---


  > Drilling down into tech link: https://www.3giovani.com/catering


  > Drilling down into tech link: https://www.3giovani.com/cart


  > Drilling down into tech link: https://www.seamless.com/menu/3-giovani-formerly-tre-giovani-548-laguardia-pl-new-york/488789


--- Processing: https://www.12chairscafe.com/ ---


  > Drilling down into tech link: https://www.12chairscafe.com/catering


  > Drilling down into tech link: https://12chairscafe.getsauce.com/


  > Drilling down into tech link: https://www.12chairscafe.com/gift-cards


  > Flash identified Events page: https://www.12chairscafe.com/private-events


--- Processing: https://www.30lovenyc.com/ ---


  > Flash identified Events page: https://www.30lovenyc.com/private-events


--- Processing: http://www.4747lic.com ---


  > Drilling down into tech link: https://4747.hrpos.heartland.us/menu


--- Processing: https://www.andsonnyc.com ---


Flash Classification Error: 1 validation error for LinkCollection
  Invalid JSON: EOF while parsing a string at line 5 column 64831 [type=json_invalid, input_value='{\n  "classified_links":...\t\\t\\t\\t\\t\\t\\t\\t', input_type=str]
    For further information visit https://errors.pydantic.dev/2.12/v/json_invalid
--- Processing: https://15easttocquevillenyc.com/ ---


  > Drilling down into tech link: https://15easttocquevillenyc.com/catering.shtml


  > Drilling down into tech link: https://15easttocquevillenyc.com/giftcard


  > Flash identified Events page: https://15easttocquevillenyc.com/event


--- Processing: https://www.21greenpoint.com/ ---


  > Drilling down into tech link: https://www.21greenpoint.com/catering


  > Drilling down into tech link: https://www.21greenpoint.com/store/gift-cards


  > Flash identified Events page: https://www.21greenpoint.com/private-event


--- Processing: http://www.390social.com ---


  > Drilling down into tech link: https://www.390social.com/cart


  > Flash identified Events page: https://www.390social.com/events


--- Processing: http://12chairscafe.com/ ---


  > Drilling down into tech link: https://www.12chairscafe.com/catering


  > Drilling down into tech link: https://12chairscafe.getsauce.com/


  > Drilling down into tech link: https://www.12chairscafe.com/gift-cards


  > Flash identified Events page: https://www.12chairscafe.com/private-events


--- Processing: https://www.rezdora.nyc ---


  > Drilling down into tech link: https://www.rezdora.nyc/gift-cards


  > Flash identified Events page: https://www.rezdora.nyc/private-events


Unnamed: 0,url,pos,stack,instagram,tripleseat,ordering_url
0,https://www.25hours.bar/,,[Wix],25hoursnyc,Not Found,
1,https://1803nyc.com,Toast,[SpotHopper],1803nyc,Not Found on Events Page,https://order.toasttab.com/online/1803-82-read...
2,https://www.19cleveland.com/,Square,"[Sauce, Cloudflare, UserWay, Microsoft Clarity]",19cleveland,Not Found on Events Page,https://www.getsauce.com/order/19-cleveland/menu
3,http://www.44andx.com/,,"[Grubhub, Resy]",44xhellskitchen,Not Found,https://www.44andx.com/takeaway.html
4,https://sushi456.com/,,"[Resy, UserWay]",,Not Found,http://order.sushi456.com/
5,http://www.11tigersnyc.com,,"[Squarespace, Resy, DoorDash, Uber Eats]",11tigersnyc,Not Found on Events Page,https://www.11tigersnyc.com/cart
6,http://www.nycprimerib.com/,,"[Squarespace, Resy, Harri, UserWay, SecureTree]",nycprimerib,Confirmed (Events Page),https://www.nycprimerib.com/cart
7,http://www.fiveacresnyc.com,Toast,"[Bentobox, OpenTable, inkind]",5acres_nyc,Not Found on Events Page,https://order.toasttab.com/online/5acres
8,https://www.3giovani.com/,,"[Squarespace, Seamless]",,Not Found,https://www.3giovani.com/catering
9,https://www.12chairscafe.com/,Toast,"[BentoBox, Sauce, Resy, AudioEye]",12chairscafe,Not Found on Events Page,https://www.12chairscafe.com/catering


#### Tester

In [24]:
urls = [
        "https://www.andsonnyc.com",
        "https://www.rezdora.nyc/"
    ]
    
results = []
crawler_run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
async with AsyncWebCrawler(verbose=False) as crawler:
    for url in urls:
        try:
            data = await process_restaurant(crawler, url, config=crawler_run_config)
            if data: results.append(data)
        except Exception as e:
            print(f"Error on {url}: {e}")

df = pd.DataFrame(results)
display(df)

--- Processing: https://www.andsonnyc.com ---


--- Processing: https://www.rezdora.nyc/ ---


Unnamed: 0,result,soup,scripts,footer,ts_found
0,(url='https://www.andsonnyc.com' html='<!DOCTY...,"[html, [[<meta content=""A7vZI3v+Gz7JfuRolKNM4A...","{www.googletagmanager.com, cdn.jsdelivr.net, u...",Greenwich Village 62 West 9th Street between 5...,False
1,(url='https://www.rezdora.nyc/' html='<!DOCTYP...,"[html, [[<meta content=""A7vZI3v+Gz7JfuRolKNM4A...","{app-assets.getbento.com, theme-assets.getbent...",Facebook Twitter Instagram Hours & Location Me...,False


In [25]:
all_links = [{"text": l['text'], "href": urljoin("https://www.rezdora.nyc/", l['href'])} 
                 for l in df["result"][1].links.get('internal', []) + df["result"][1].links.get('external', [])]

In [26]:
classify_test_results = classify_links_flash(all_links)

In [27]:
test_links = classify_test_results.classified_links

In [33]:
deep_dive_signals = []
scripts = set()
ordering_candidates = [l for l in test_links if l.category == "ordering"]
gift_candidates = [l for l in test_links if l.category == "gift_card"]
urls_to_drill = (ordering_candidates + gift_candidates)[:4]

In [37]:
for link_obj in urls_to_drill:
        print(f"  > Drilling down into tech link: {link_obj.url}")
        try:
            async with AsyncWebCrawler(verbose=False) as crawler:
                sub_res = await crawler.arun(url=link_obj.url, config=crawler_run_config)

                if sub_res.success:
                    # Signal A: Did we get redirected?
                    # Compare the final URL to the one we clicked.
                    # If we clicked /order and ended up on toasttab.com, that's a strong signal.
                    if urlparse(sub_res.url).netloc != urlparse(link_obj.url).netloc:
                        deep_dive_signals.append(f"Redirect from {link_obj.text}: {sub_res.url}")

                    # Signal B: Scan for External Links on this sub-page
                    # This handles the case where the page is internal but contains a button to the POS.
                    # We extract external links found on this sub-page.
                    external_links = sub_res.links.get("external", [])

                    for ext_link in external_links[:15]:
                        href = ext_link.get('href', '')
                        if href:
                            deep_dive_signals.append(f"Link on '{link_obj.text}' page: {href}")

                    # Signal C: Capture scripts on this sub-page
                    sub_soup = BeautifulSoup(sub_res.html, 'html.parser')
                    for s in sub_soup.find_all('script', src=True):
                        domain = urlparse(s.get('src')).netloc
                        if domain: scripts.add(domain)
        except Exception as e:
            print(f"  ! Failed to drill down {link_obj.url}: {e}")

  > Drilling down into tech link: https://www.rezdora.nyc/gift-cards


In [41]:
tech_profile = analyze_tech_stack_gemini3(
        test_links, 
        list(scripts)[:50], 
        results[1]['footer'],
        deep_dive_signals # <--- Passing the deep dive content
    )

In [43]:
events_link = next((l for l in test_links if l.category == "private_events"), None)
if events_link:
    print(f"  > Flash identified Events page: {events_link.url}")
    try:
        async with AsyncWebCrawler(verbose=False) as crawler:
            evt_res = await crawler.arun(url=events_link.url)
            if evt_res.success:
                if "tripleseat.com" in evt_res.html or "tripleseat" in evt_res.html.lower():
                    tech_profile.tripleseat_status = "Confirmed (Events Page)"
                elif tech_profile.tripleseat_status == "Not Found":
                    tech_profile.tripleseat_status = "Not Found on Events Page"
    except Exception as e:
            print(f"  ! Failed to drill down {events_link.url}: {e}")

  > Flash identified Events page: https://www.rezdora.nyc/private-events


In [44]:
tech_profile

RestaurantTechProfile(pos_system='Toast', tech_stack=['BentoBox', 'Resy', 'AudioEye', 'Google Maps'], ordering_provider=None, instagram_handle='rezdoranyc', tripleseat_status='Confirmed (Events Page)')