# Pydantic models for SERPs (search engine result pages)

In [8]:
from enum import Enum
from pydantic import BaseModel

class SearchSourceEnum(str, Enum):
    google = 'google'
    ddg = 'duckduckgo'
    bing = 'bing'
    yandex = 'yandex'
    brave = 'brave'
    yahoo = 'yahoo'
    ecosia = 'firecracker'
    qwant = 'qwant'
    aol = 'aol'

class SerpInput(BaseModel):
    pass

class SerpOutput(BaseModel):
    title: str
    description: str
    breadcrumbs: str
    display_url: str
    url: str
    source: SearchSourceEnum

    


# Collecting search queries datasets from [trends.google.com](https://trends.google.com) and various sources

This is a simple `aiohttp` crawler for adding additions to our search queries dataset

In [16]:

# cookies = {
#     'NID': '525=bZa35AR_9LHsoGvCWVhAAmzTKb81Q1j3Ug5tbJc0Oh2ZQVrjNkG-jubxLdAxFtp0uzNySdmH9FCZ55rwZvxh1NWch9k6PsVb9fIfMreO6C8SmRg2xbIVphfq6QaHWTPZM-zmUQKqkmmGot46NbI0LwzZiPjKWZD1gCwpB3DdWvCoKxvewFwEBvO5rS76P5SmUzQ0n2BdFbH8yOPvuzl8_TLKF1MtU_6OZYUB6ryrE0zmSN29N3OOugZFB4KxH0Yk_b4McCVDogBfA0VFwU4W2luKuStNNnoGsJfaAFd8HjXMfEatRFb-VwCNtUqMeK5JeoGxmnyPorMG9e3FlDtjzTtdVGkXHvffCKKwX72SN9N5TMeQk5rQ15iDByjeotQaPWuaqpjJ6EWBJVPID_xlmtq63hnw2PaDcQXqy_kzpyyyq7mTbH3wofCE',
#     'SEARCH_SAMESITE': 'CgQIv54B',
#     'AEC': 'AVh_V2h6p3VYs1jmgqY_RX-XMSenyDQ4lsJdcHZ3Oyp1Q90yh5s3BbwTqg',
#     'OTZ': '8237762_80_84_104220_80_446880',
# }

google_trends_headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br, zstd',
    'Connection': 'keep-alive',
    'Referer': 'https://trends.google.com/trends/explore?geo=CA&q=OSINT&hl=en-US',
    'Cookie': 'NID=525=bZa35AR_9LHsoGvCWVhAAmzTKb81Q1j3Ug5tbJc0Oh2ZQVrjNkG-jubxLdAxFtp0uzNySdmH9FCZ55rwZvxh1NWch9k6PsVb9fIfMreO6C8SmRg2xbIVphfq6QaHWTPZM-zmUQKqkmmGot46NbI0LwzZiPjKWZD1gCwpB3DdWvCoKxvewFwEBvO5rS76P5SmUzQ0n2BdFbH8yOPvuzl8_TLKF1MtU_6OZYUB6ryrE0zmSN29N3OOugZFB4KxH0Yk_b4McCVDogBfA0VFwU4W2luKuStNNnoGsJfaAFd8HjXMfEatRFb-VwCNtUqMeK5JeoGxmnyPorMG9e3FlDtjzTtdVGkXHvffCKKwX72SN9N5TMeQk5rQ15iDByjeotQaPWuaqpjJ6EWBJVPID_xlmtq63hnw2PaDcQXqy_kzpyyyq7mTbH3wofCE; SEARCH_SAMESITE=CgQIv54B; AEC=AVh_V2h6p3VYs1jmgqY_RX-XMSenyDQ4lsJdcHZ3Oyp1Q90yh5s3BbwTqg; OTZ=8237762_80_84_104220_80_446880',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'Host': 'trends.google.com',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'TE': 'trailers',
}


In [None]:
import aiohttp
import json
import urllib.parse
import asyncio

TRENDS_TOKEN = "APP6_UEAAAAAaLSGzuAxitMWsZXYwz3dmMHa7M_gVOAw"

async def main(COUNTRY_CODE, QUERY):
    trends_req = json.dumps({"restriction":{"geo": {"country": COUNTRY_CODE},"time":"2024-08-30 2025-08-30","originalTimeRangeForExploreUrl":"today 12-m","complexKeywordsRestriction":{"keyword":[{"type":"BROAD","value": QUERY}]}},"keywordType":"QUERY","metric":["TOP","RISING"],"trendinessSettings":{"compareTime":"2023-08-30 2024-08-29"},"requestOptions":{"property":"","backend":"IZG","category":0},"language":"en","userCountryCode": COUNTRY_CODE,"userConfig":{"userType":"USER_TYPE_LEGIT_USER"}})
    req = urllib.parse.quote_plus(trends_req)
    # Google doesn't urlencode `:` or `,` so we need to fix
    # the Python url encoding to work with google with these replaces
    req = req.replace("%3A", ":").replace("%2C", ",")
    google_trends_url = f'https://trends.google.com/trends/api/widgetdata/relatedsearches?hl=en-US&tz=360&req={req}&token={TRENDS_TOKEN}'
    print(f"Pulling Google Trends related queries\nCountry: {COUNTRY_CODE}\nQuery: {QUERY}")
    print(f"Google Trends Request URL: {google_trends_url}")
    print("---")
    async with aiohttp.ClientSession(headers=google_trends_headers) as session:
        async with session.get(google_trends_url) as response:

            print("Status:", response.status)
            print("Content-type:", response.headers['content-type'])

            json_str = await response.text()
            json_str = json_str.replace(")]}',", "")
            json_response = json.loads(json_str)
            
            # Pretty print related queries response:
            print("Response: ", json.dumps(json_response, indent=4))

await main("CA", "OSINT")


Pulling Google Trends related queries
Country: CA
Query: OSINT
Google Trends Request URL: https://trends.google.com/trends/api/widgetdata/relatedsearches?hl=en-US&tz=360&req=%7B%22restriction%22:+%7B%22geo%22:+%7B%22country%22:+%22CA%22%7D,+%22time%22:+%222024-08-30+2025-08-30%22,+%22originalTimeRangeForExploreUrl%22:+%22today+12-m%22,+%22complexKeywordsRestriction%22:+%7B%22keyword%22:+%5B%7B%22type%22:+%22BROAD%22,+%22value%22:+%22OSINT%22%7D%5D%7D%7D,+%22keywordType%22:+%22QUERY%22,+%22metric%22:+%5B%22TOP%22,+%22RISING%22%5D,+%22trendinessSettings%22:+%7B%22compareTime%22:+%222023-08-30+2024-08-29%22%7D,+%22requestOptions%22:+%7B%22property%22:+%22%22,+%22backend%22:+%22IZG%22,+%22category%22:+0%7D,+%22language%22:+%22en%22,+%22userCountryCode%22:+%22CA%22,+%22userConfig%22:+%7B%22userType%22:+%22USER_TYPE_LEGIT_USER%22%7D%7D&token=APP6_UEAAAAAaLSGzuAxitMWsZXYwz3dmMHa7M_gVOAw
---
Status: 200
Content-type: application/json; charset=UTF-8
Response:  {
    "default": {
        "ranked