In [13]:
# CHECK IF YOU ARE STILL BLOCKED BY GOOGLE 12PM APR 3 (1-24 HOURS IF TEMPORARY, SEVERAL DAYS TO WEEKS IF AGGRESSIVE, RARE PERMANENT BLOCK)
# SEE IF YOU CAN USE THE SAME API KEY LATER ON, IF NOT GENERATE A NEW ONE WITH SCRAPINGDOG (200 API KEYS AVAILABLE IN THE NEXT MONTH)
# 1 API KEY CAN SCRAPE ~4300 RESULTS BEFORE GETTING BLOCKED

import requests
import time
import random

# Configuration
API_KEY = "67e973fad9a23c3a9fd5e970"  # Replace with your actual key
BASE_URL = "https://api.scrapingdog.com/google"
TEST_QUERY = "weather today"  # Generic test query

def check_google_block():
    """Check if Google is blocking the API key with a minimal test request"""
    params = {
        "api_key": API_KEY,
        "query": TEST_QUERY,
        "results": 1,  # Only 1 result to reduce footprint
        "country": "us",
        "advance_search": "false"
    }

    try:
        # Add slight random delay
        time.sleep(random.uniform(1, 3))
        
        response = requests.get(BASE_URL, params=params, timeout=10)
        
        if response.status_code == 200:
            data = response.json()
            if data.get('organic_results'):
                print("✅ Success! You're NOT blocked. Sample result:")
                print(f"Title: {data['organic_results'][0]['title']}")
                print(f"Link: {data['organic_results'][0]['link']}")
            else:
                print("⚠️ API working but no results returned (possible soft block)")
        elif response.status_code == 403:
            print("❌ BLOCKED: Google detected scraping (HTTP 403)")
        elif response.status_code == 429:
            print("⏳ RATE LIMITED: Too many requests (HTTP 429)")
        else:
            print(f"⚠️ Unexpected status: {response.status_code} | Response: {response.text[:200]}")

    except Exception as e:
        print(f"🚨 Connection failed: {str(e)}")
        print("This could indicate a block or network issue")

if __name__ == "__main__":
    print("Running Google block check...")
    check_google_block()

Running Google block check...
❌ BLOCKED: Google detected scraping (HTTP 403)


In [9]:
# INTERNATIONAL GENERAL (DONE - FILTER IN POST)

import requests
import csv
import os
from datetime import datetime

# Configuration
API_KEY = "67e973fad9a23c3a9fd5e970"
BASE_URL = "https://api.scrapingdog.com/google"
OUTPUT_FILE = os.path.expanduser("~/Desktop/trial1.csv")
QUERIES = ["climate database",
         "socioeconomic statistics",
         "climate vulnerability assessment",
         "climate risk model",
         "climate fund",
         "climate adaptation framework",
         "climate policy",
         "climate technology",
         "climate innovation research",
        "climate resilience building",
        "climate adaptation and mitigation strategies",
        "climate strategy monitoring",
        "climate strategy evaluation",
        "climate resilience program",
        "climate adaptation plan"]

def is_duplicate(result, existing_urls, existing_titles):
    """Check if result is a duplicate based on URL and title"""
    url = result.get('link', '').lower().strip()
    title = result.get('title', '').lower().strip()
    
    # Remove common tracking parameters from URLs
    clean_url = url.split('?')[0].split('#')[0]
    
    return (clean_url in existing_urls) or (title in existing_titles)

def fetch_and_save_results():
    """Fetch Google results, remove duplicates, and save to CSV"""
    all_results = []
    seen_urls = set()
    seen_titles = set()
    
    for query in QUERIES:
        print(f"\nSearching for: '{query}'")
        
        for page in range(10):  # Pages 0-9
            params = {
                "api_key": API_KEY,
                "query": query,
                "results": 100,
                "country": "us",
                "page": page,
                "advance_search": "false"
            }
            
            try:
                response = requests.get(BASE_URL, params=params, timeout=15)
                response.raise_for_status()
                data = response.json()
                organic_results = data.get('organic_results', [])
                
                new_results = 0
                for result in organic_results:
                    if not is_duplicate(result, seen_urls, seen_titles):
                        url = result.get('link', '').lower().strip()
                        clean_url = url.split('?')[0].split('#')[0]
                        title = result.get('title', '').lower().strip()
                        
                        seen_urls.add(clean_url)
                        seen_titles.add(title)
                        
                        all_results.append({
                            'query': query,
                            'rank': result.get('rank'),
                            'title': result.get('title', '').strip(),
                            'link': url,
                            'snippet': result.get('snippet', '').replace('\n', ' ').strip(),
                            'page': page + 1
                        })
                        new_results += 1
                
                print(f"Page {page + 1}: {len(organic_results)} results | {new_results} new")
                
            except Exception as e:
                print(f"Error on page {page + 1}: {str(e)}")
                continue
    
    # Save to CSV
    if all_results:
        fieldnames = ['query', 'rank', 'title', 'link', 'snippet', 'page']
        
        with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_results)
            
        print(f"\nSuccess! Saved {len(all_results)} unique results to {OUTPUT_FILE}")
    else:
        print("No results to save")

if __name__ == "__main__":
    print(f"Starting Google searches for {len(QUERIES)} queries")
    fetch_and_save_results()

Starting Google searches for 15 queries

Searching for: 'climate database'
Page 1: 10 results | 10 new
Page 2: 99 results | 88 new
Page 3: 99 results | 5 new
Page 4: 98 results | 1 new
Page 5: 88 results | 0 new
Page 6: 75 results | 0 new
Page 7: 65 results | 0 new
Page 8: 53 results | 0 new
Page 9: 45 results | 0 new
Page 10: 10 results | 0 new

Searching for: 'socioeconomic statistics'
Page 1: 10 results | 10 new
Page 2: 10 results | 8 new
Page 3: 99 results | 93 new
Page 4: 100 results | 2 new
Page 5: 99 results | 1 new
Page 6: 87 results | 0 new
Page 7: 79 results | 0 new
Page 8: 69 results | 0 new
Page 9: 59 results | 0 new
Page 10: 49 results | 0 new

Searching for: 'climate vulnerability assessment'
Page 1: 99 results | 93 new
Page 2: 99 results | 5 new
Page 3: 99 results | 8 new
Page 4: 99 results | 14 new
Page 5: 99 results | 8 new
Page 6: 99 results | 11 new
Page 7: 99 results | 8 new
Page 8: 99 results | 13 new
Page 9: 99 results | 5 new
Page 10: 99 results | 1 new

Searchin

In [None]:
# COUNTRY-SPECIFIC (RE-TRY LATER - ADDED TIME DELAY AND HEADERS, CONSIDER ROTATING PROXIES (VPN?) AND HEADERS)

import requests
import csv
import os
from datetime import datetime
import time
import random

# Configuration
API_KEY = "67e973fad9a23c3a9fd5e970"
BASE_URL = "https://api.scrapingdog.com/google"
OUTPUT_FILE = os.path.expanduser("~/Desktop/countrytrial.csv")
QUERIES = ["administrative service",
          "natural disaster",
          "disaster relief",
          "aid delivery",
          "government protection",
          "capacity building program",
          "resilience training",
          "local adaptation efforts",
          "municipal policy",
          "weather warning",
          "rural improvement",
          "regional law",
          "provincial office",
          "official statistics",
          "climate report"]

headers = {
    "accept": "*/*",
    "accept-language": "en-US,en;q=0.9,no;q=0.8",
    "cache-control": "no-cache",
    "cookie": "SOCS=CAESHAgCEhJnd3NfMjAyNDEwMDMtMF9SQzEaAmVuIAEaBgiApoe4Bg; __Secure-BUCKET=CLQC; SEARCH_SAMESITE=CgQIkJ0B; HSID=Ax0y33Enje0UKSJPo; SSID=A9cmbWiDlI2OBuHZT; APISID=nmYz1QxsJhP0g1yR/A8YqUSUkhm8IcFxGD; SAPISID=6QHzUcfRrMWlUVCG/AKD4SutBCBcM2N3xH; __Secure-1PAPISID=6QHzUcfRrMWlUVCG/AKD4SutBCBcM2N3xH; __Secure-3PAPISID=6QHzUcfRrMWlUVCG/AKD4SutBCBcM2N3xH; SID=g.a000uwh39SaBgDO170HyHlwrmugxi6wvHndyxPCjS_ut2oRv927o-QeAshD48HBGD92aT_C_TQACgYKAdASARUSFQHGX2MiT37QypurPXs1oOw3M51yRhoVAUF8yKo7Xvam4trmY_aMLdPVbcsD0076; __Secure-1PSID=g.a000uwh39SaBgDO170HyHlwrmugxi6wvHndyxPCjS_ut2oRv927oUvFohrciKEyUg7DiNALqwAACgYKAbkSARUSFQHGX2MiwLnWzwSKLFK6iAh_rZbq-xoVAUF8yKq_z6bURjNo9Z2OskbNB3_P0076; __Secure-3PSID=g.a000uwh39SaBgDO170HyHlwrmugxi6wvHndyxPCjS_ut2oRv927oLCDIgsbUCdVXnbsMRBxXkAACgYKAbASARUSFQHGX2MiQDiuB3M1f7BOoWkl1n8p8BoVAUF8yKr03VED9j6Mgg3S-yp-czle0076; AEC=AVcja2duJw0wPzXr5MorDrVVUvWqLIiaKyJHAkYGTms9mPHCbHNjVNu8mSw; NID=523=SaP1Qxjr4bbID-1wRVZYKVJRTByaFeOQDRC9EkEDGsx95nfSyjuh1djU89nA0Ajqj1l0z5wBuyLLn63t1qfkq1MjeB1eg4shmURhNRh1Ch9EjqrG_SSo0U2L59ISd85RoiSssz_J3PMqQNwYqZ2KQ5ugGDVZm-e54iMNecWK_JpzMm3GJPNRJWm1BBzD4-mkXi5QR5KLpDAa9QdpE7AOVCIN3Y0NH3UK-Tik8P-KyGe94IImybvh_kZsO3thwDu-W4TQ2aUlHgAO2L6cVTGtsvvgkdiKbobphI6vfHoxIIr_9TSBQB-l3q4epzTrnvL8n0UemNupW3bjrYLlReFGepyl0kNSKBsdcn7OnUvI31BkKI6gjx7n8g1HfydzG-hdzJEuqWlWm0NajT9rPTK9fXJcbqo-WCSCKPPZPjfuY90_TJ2dlxGA5XzcVGmKK-n5d6ANDeD_19viw6R4hkYntzJEKEApSt7ew_sO0uRlVepGjlhYZdXldIvkzysZ9Xv3Dwm2aWjPJSNb0WJGU3gTWi4VDjcG4ysWpLXSnIUCuBp6LvaPEr_4xPYmoiBbQN81r-f8H0F74qG-djDAQ_dfGe2b7Mtbw76Fmq_O7eQpayt4AYOCZW_QxnB-aoj2Odx5EYMUrN7rDhuReHzWYwFWbwIGpCkN-3_SMpVvMGA8aX6TcFv-7pztsS5zA00uV_Vh6Dmfrz8e9ZtvDBSnZhhOPc2gccIuqcWNYcXBYvn3gYSMQbVw5eLUoVDQpYFjib1whN-ZqMteYIUXLcc7cXzvVApk2pX_ZAKd8fFcPJKCj4jWrOUdsTZD3X-A5ki-wHEra2_Ohp74MBJADjBjcjzYJEpFjokLRioshwNk4iZGiw-kN8h1SK86X8UbZ91mMmu_jXLv9dJdKqLxvELHzibCHBS2LehyHAEYolRZKU9YFCgBnViPwrTsVcDKh1aZglkQ8y5Y5235Y4kYEmgqNhrooke3oNxBRQNMRYgOYCsGZ4eZmtbsQgVJtzSD-14GI7_zAPn-NqNKJ_GnEg_nUOqqvXPbymSX1obZgCPT6KT1JzgqJ1Vjrwoef3gt_ZbmuqGNXkw1AT4Zm8eJSc9ySvPZo2UAFZEplhTmyq88xuE702gm7FG4dE2tJkNPLsLRstEt7pggJ6TgTZyuujubGGobU28i3YZEbTeUa5ufINPJxIdOLCZp6GL375JQqeE_NHMeT4AITrJ7oJKjdl3onyelo8LccXxMAgc2uVSU8T-3FclpU6eJNi8j2cSpBncea57bLvkDZDQ7qWoD720IX-JLGqtWcZdIRsnbTaBUOII3kC3s_n-Y; __Secure-ENID=26.SE=IfiJg-J4k8y7GloTPrsBR5Vq2_M4OcuKozjSaqNFQ2WJWTweIWt7Df9-cDo2wVnwE_-yKDYPgZdF1EA_qa4R81T9CdqrZvsmOMIXsBnVjcInmQFmwApIYRm9yJPv2-W-tOxz0KKkpWHHIk5B1kacdEllPQBzypOVeEZaHKAhLlgY5rnmyaoXU99fQGZDgL_yUpzCY_WfBapW9r02zEElQ4nieSV8ggKDY1f_U4q5MmAh1r0FC0f2syGbdts4CPRi0vmPCGM6nCS0-QiVhENBlSkF5KAFh0YTEOhPpsNmBLDtZLA5VwTndNXYuTIgmEzygMs6w5CtaxMYuzluf64czg1QRG12V6SySbT5EKcLlsKVQAkI-1uXrev_BP5i-E2p3AczK1kMyCSSRDBjqExVErZsIsY1KzeqpNsR3S8gLDGPogwfuO_1ZoV98aIJjEmodmhGjf5HKhdLjpgoOxj1kgdijc9PRVfK8QiMam398hFAjyjcnFToIFYYA-O82Gh8Wcqcz1PUnQ1XPQ; __Secure-1PSIDTS=sidts-CjEB7pHptfiZGfvho9KoBxhqz5cRBbIQV2YPYmkirnmWLht9FsmEcM2rTVjAuqVfwTWHEAA; __Secure-3PSIDTS=sidts-CjEB7pHptfiZGfvho9KoBxhqz5cRBbIQV2YPYmkirnmWLht9FsmEcM2rTVjAuqVfwTWHEAA; DV=AxyFY00EA5pT4B5gABCykxHlr4eeX1mAuClUv9L13QIAAFBxZ8UCkzF_zwAAAKBhEoxfPqk8SAAAAOFrxYOZiv9pEwAAAA; SIDCC=AKEyXzU6-HsLQTEeVBFjTFUuUNDWtXdiKq1lsWuhtq-cuaMjQWKwcLxT9UyRzGiGrx2a7qTSuQ; __Secure-1PSIDCC=AKEyXzXsqCX_Wr_o-nNRolEUDe-kreVtK5dYi9p1SsH06-TSqvLQ613PWRBz3VwMRTW8CMnwco0; __Secure-3PSIDCC=AKEyXzUsZJl3foJ_h4i8oAVPllEvA4nSInVkCcaXl9iUSGUqhi0gtLAcni3QM9imwnTjR5YwYRQ",
    "downlink": "10",
    "pragma": "no-cache",
    "priority": "u=1, i",
    "referer": "https://www.google.com/",
    "rtt": "50",
    "sec-ch-prefers-color-scheme": "dark",
    "sec-ch-ua": "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"",
    "sec-ch-ua-arch": "\"arm\"",
    "sec-ch-ua-bitness": "\"64\"",
    "sec-ch-ua-form-factors": "\"Desktop\"",
    "sec-ch-ua-full-version": "\"134.0.6998.166\"",
    "sec-ch-ua-full-version-list": "\"Chromium\";v=\"134.0.6998.166\", \"Not:A-Brand\";v=\"24.0.0.0\", \"Google Chrome\";v=\"134.0.6998.166\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-model": "",
    "sec-ch-ua-platform": "\"macOS\"",
    "sec-ch-ua-platform-version": "\"14.5.0\"",
    "sec-ch-ua-wow64": "?0",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
    "x-client-data": "CIa2yQEIpbbJAQipncoBCJ7hygEIlaHLAQiKo8sBCIagzQEIuMjNAQj+pc4BCLzVzgEIruTOARjh4s4B"
}

def is_duplicate(result, existing_urls, existing_titles):
    """Check if result is a duplicate based on URL and title"""
    url = result.get('link', '').lower().strip()
    title = result.get('title', '').lower().strip()
    
    # Remove common tracking parameters from URLs
    clean_url = url.split('?')[0].split('#')[0]
    
    return (clean_url in existing_urls) or (title in existing_titles)

def fetch_and_save_results():
    """Fetch Google results, remove duplicates, and save to CSV"""
    all_results = []
    seen_urls = set()
    seen_titles = set()
    
    for query in QUERIES:
        print(f"\nSearching for: '{query}'")
        
        for page in range(10):  # Pages 0-9

            delay = random.uniform(2, 5)  
            time.sleep(delay)
            
            params = {
                "api_key": API_KEY,
                "query": query,
                "results": 100,
                "country": "ph",
                "page": page,
                "advance_search": "false"
            }
            
            try:
                response = requests.get(BASE_URL, params=params, headers=headers, timeout=15)
                response.raise_for_status()
                data = response.json()
                organic_results = data.get('organic_results', [])
                
                new_results = 0
                for result in organic_results:
                    if not is_duplicate(result, seen_urls, seen_titles):
                        url = result.get('link', '').lower().strip()
                        clean_url = url.split('?')[0].split('#')[0]
                        title = result.get('title', '').lower().strip()
                        
                        seen_urls.add(clean_url)
                        seen_titles.add(title)
                        
                        all_results.append({
                            'query': query,
                            'rank': result.get('rank'),
                            'title': result.get('title', '').strip(),
                            'link': url,
                            'snippet': result.get('snippet', '').replace('\n', ' ').strip(),
                            'page': page + 1
                        })
                        new_results += 1
                
                print(f"Page {page + 1}: {len(organic_results)} results | {new_results} new")
                
            except Exception as e:
                print(f"Error on page {page + 1}: {str(e)}")
                continue
    
    # Save to CSV
    if all_results:
        fieldnames = ['query', 'rank', 'title', 'link', 'snippet', 'page']
        
        with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_results)
            
        print(f"\nSuccess! Saved {len(all_results)} unique results to {OUTPUT_FILE}")
    else:
        print("No results to save")

if __name__ == "__main__":
    print(f"Starting Google searches for {len(QUERIES)} queries")
    fetch_and_save_results()