In [1]:
import pandas as pd

keywords = pd.read_csv("unique_keywords.csv")
# Shuffle the keywords dataframe
# keywords = keywords.sample(frac=1, random_state=42).reset_index(drop=True)

keywords.shape

(195600, 1)

In [2]:
# Split keywords into batches of 10000
keyword_batches = []
batch_size = 1000
num_batches = (len(keywords) // batch_size) + (1 if len(keywords) % batch_size else 0)

for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = start_idx + batch_size
    batch = keywords.iloc[start_idx:end_idx]["Keyword"].tolist()
    payload = [{
        "keywords": batch,
        "sort_by": "relevance"
    }]
    keyword_batches.append(payload)

print(f"Split into {len(keyword_batches)} batches")
# for i, batch in enumerate(keyword_batches):
#     print(f"Batch {i+1} size: {len(batch[0]['keywords'])}")


Split into 196 batches


In [9]:
import dotenv
import os
from dataforseo_client import configuration as dfs_config, api_client as dfs_api_provider
from dataforseo_client.api.keywords_data_api import KeywordsDataApi
from dataforseo_client.rest import ApiException
from dataforseo_client.models.keywords_data_google_ads_search_volume_live_request_info import KeywordsDataGoogleAdsSearchVolumeLiveRequestInfo
import time

dotenv.load_dotenv()
login = os.getenv("DATA_FOR_SEO_LOGIN")
password = os.getenv("DATA_FOR_SEO_PASSWORD")
get_date_from = (time.strftime("%Y-%m-%d", time.gmtime(time.time() - 4 * 365 * 24 * 60 * 60)))
print(get_date_from)

def get_keyword_search_volume(keywords: list[str]):
    """
    Get search volume data for keywords from DataForSEO API
    
    Args:
        keywords (list): List of keyword strings to get data for
        
    Returns:
        dict: API response containing search volume data
    """
    configuration = dfs_config.Configuration(username=login, password=password)
    with dfs_api_provider.ApiClient(configuration) as api_client:
        # Create an instance of the API class
        keywords_api = KeywordsDataApi(api_client)
        
        try:
            # Extract just the keywords list from the payload dictionary
            if isinstance(keywords[0], dict):
                keywords = keywords[0]['keywords']
                
            api_response = keywords_api.google_ads_search_volume_live([KeywordsDataGoogleAdsSearchVolumeLiveRequestInfo(
                keywords=keywords,
                search_partners=True,
                date_from=get_date_from,
                date_to=None
            )])
            return api_response
            
        except ApiException as e:
            print(f"Exception: {e}\n")
            return None

2021-11-05


In [10]:
result = get_keyword_search_volume(keyword_batches[0])

In [11]:
# Save the API response to a file
import json

# Convert the response object to a dictionary
result_dict = result.to_dict()

# Save to JSON file with nice formatting
with open('search_volume_results_0.json', 'w') as f:
    json.dump(result_dict, f, indent=4)


In [None]:
# Create a directory for search volumes if it doesn't exist
import os
if not os.path.exists('search_volumes'):
    os.makedirs('search_volumes')

# Process each batch and save results
for i, batch in enumerate(keyword_batches[1:]):
    # Get search volume for this batch
    result = get_keyword_search_volume(batch)
    
    if result:
        # Convert response to dictionary
        result_dict = result.to_dict()
        
        # Save to numbered JSON file
        filename = f'search_volumes/search_volume_results_{i}.json'
        with open(filename, 'w') as f:
            json.dump(result_dict, f, indent=4)
        print(f"Saved results for batch {i} to {filename}")
    else:
        print(f"Failed to get results for batch {i}")


In [4]:
import requests
import base64
import json
import os

# Task ID to retrieve
task_id = "11111601-1191-0108-0000-b4fdd0d92e76"

# API endpoint
api_url = f"https://api.dataforseo.com/v3/keywords_data/google_ads/keywords_for_site/task_get/{task_id}"

# Authentication
login = os.getenv("DATA_FOR_SEO_LOGIN")
password = os.getenv("DATA_FOR_SEO_PASSWORD")

# Create Basic Auth header
credentials = f"{login}:{password}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()

headers = {
    "Authorization": f"Basic {encoded_credentials}",
    "Content-Type": "application/json"
}

# Make GET request
response = requests.get(api_url, headers=headers)

# Check response
if response.status_code == 200:
    result = response.json()
    print(f"Status Code: {result.get('status_code')}")
    print(f"Status Message: {result.get('status_message')}")
    
    if result.get('tasks') and len(result['tasks']) > 0:
        task = result['tasks'][0]
        print(f"\nTask Status Code: {task.get('status_code')}")
        print(f"Task Status Message: {task.get('status_message')}")
        
        if task.get('result'):
            print(f"\nNumber of keywords found: {len(task['result'])}")
            if len(task['result']) > 0:
                print("\nFirst keyword result:")
                print(json.dumps(task['result'][0], indent=2))
    else:
        print("\nNo tasks found in response")
        print(json.dumps(result, indent=2))
else:
    print(f"Error: {response.status_code}")
    print(response.text)


Status Code: 20000
Status Message: Ok.

Task Status Code: 20000
Task Status Message: Ok.

Number of keywords found: 430

First keyword result:
{
  "keyword": "data warehouse",
  "location_code": 2840,
  "language_code": null,
  "search_partners": false,
  "competition": "LOW",
  "competition_index": 17,
  "search_volume": 12100,
  "low_top_of_page_bid": 3.83,
  "high_top_of_page_bid": 14.19,
  "cpc": 13.54,
  "monthly_searches": [
    {
      "year": 2025,
      "month": 10,
      "search_volume": 12100
    },
    {
      "year": 2025,
      "month": 9,
      "search_volume": 12100
    },
    {
      "year": 2025,
      "month": 8,
      "search_volume": 9900
    },
    {
      "year": 2025,
      "month": 7,
      "search_volume": 12100
    },
    {
      "year": 2025,
      "month": 6,
      "search_volume": 12100
    },
    {
      "year": 2025,
      "month": 5,
      "search_volume": 12100
    },
    {
      "year": 2025,
      "month": 4,
      "search_volume": 14800
    },
    {

In [3]:
# Create a task POST request for jitsu.com
import requests
import base64
import json
import os

# Website target
target_website = "jitsu.com"

# API endpoint for task POST
api_url = "https://api.dataforseo.com/v3/keywords_data/google_ads/keywords_for_site/task_post"

# Authentication
login = os.getenv("DATA_FOR_SEO_LOGIN")
password = os.getenv("DATA_FOR_SEO_PASSWORD")

# Create Basic Auth header
credentials = f"{login}:{password}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()

headers = {
    "Authorization": f"Basic {encoded_credentials}",
    "Content-Type": "application/json"
}

# Request payload - using location_code 2840 for United States
post_data = [{
    "location_code": 2840,
    "target": target_website,
    "tag": "jitsu_com_keywords"
}]

# Make POST request
response = requests.post(api_url, headers=headers, json=post_data)

# Check response
if response.status_code == 200:
    result = response.json()
    print(f"Status Code: {result.get('status_code')}")
    print(f"Status Message: {result.get('status_message')}")
    print(f"Cost: {result.get('cost')}")
    print(f"Tasks Count: {result.get('tasks_count')}")
    
    if result.get('tasks') and len(result['tasks']) > 0:
        task = result['tasks'][0]
        print(f"\nTask ID: {task.get('id')}")
        print(f"Task Status Code: {task.get('status_code')}")
        print(f"Task Status Message: {task.get('status_message')}")
        print(f"Task Cost: {task.get('cost')}")
        
        # Save task ID for later retrieval
        task_id = task.get('id')
        print(f"\n‚úÖ Task created successfully!")
        print(f"üìã Task ID: {task_id}")
        print(f"\nüí° Use this Task ID to retrieve results later with task_get")
    else:
        print("\nNo tasks found in response")
        print(json.dumps(result, indent=2))
else:
    print(f"Error: {response.status_code}")
    print(response.text)


Status Code: 20000
Status Message: Ok.
Cost: 0.05
Tasks Count: 1

Task ID: 11111601-1191-0108-0000-b4fdd0d92e76
Task Status Code: 20100
Task Status Message: Task Created.
Task Cost: 0.05

‚úÖ Task created successfully!
üìã Task ID: 11111601-1191-0108-0000-b4fdd0d92e76

üí° Use this Task ID to retrieve results later with task_get


In [9]:
# Live API call for asha.health - returns results immediately
import requests
import base64
import json
import os
from datetime import datetime, timedelta

# Website target
target_website = "pawwallet"

# API endpoint for live API (returns results immediately)
api_url = "https://api.dataforseo.com/v3/keywords_data/google_ads/keywords_for_site/live"

# Authentication
login = os.getenv("DATA_FOR_SEO_LOGIN")
password = os.getenv("DATA_FOR_SEO_PASSWORD")

# Create Basic Auth header
credentials = f"{login}:{password}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()

headers = {
    "Authorization": f"Basic {encoded_credentials}",
    "Content-Type": "application/json"
}

# Calculate date range: last 4 years from today
today = datetime.now()
four_years_ago = today - timedelta(days=4*365)
date_from = four_years_ago.strftime("%Y-%m-%d")
date_to = today.strftime("%Y-%m-%d")

# Request payload - using location_code 2840 for United States
post_data = [{
    "location_code": 2840,
    "target": target_website,
    "sort_by": "relevance",
    "date_from": date_from,
    "date_to": date_to
}]

print(f"Making live API call for: {target_website}")
print(f"Date range: {date_from} to {date_to}")
print("Please wait...\n")

# Make POST request
response = requests.post(api_url, headers=headers, json=post_data)

# Check response
if response.status_code == 200:
    result = response.json()
    print(f"Status Code: {result.get('status_code')}")
    print(f"Status Message: {result.get('status_message')}")
    print(f"Cost: {result.get('cost')}")
    print(f"Tasks Count: {result.get('tasks_count')}")
    
    if result.get('tasks') and len(result['tasks']) > 0:
        task = result['tasks'][0]
        print(f"\nTask Status Code: {task.get('status_code')}")
        print(f"Task Status Message: {task.get('status_message')}")
        print(f"Task Cost: {task.get('cost')}")
        
        if task.get('result'):
            keywords = task['result']
            print(f"\n‚úÖ Success! Found {len(keywords)} keywords")
            
            # Display summary statistics
            if len(keywords) > 0:
                total_search_volume = sum(k.get('search_volume', 0) for k in keywords)
                avg_cpc = sum(k.get('cpc', 0) for k in keywords) / len(keywords) if keywords else 0
                
                print(f"\nüìä Summary Statistics:")
                print(f"   Total Keywords: {len(keywords)}")
                print(f"   Total Search Volume: {total_search_volume:,}")
                print(f"   Average CPC: ${avg_cpc:.2f}")
                
                # Show top 5 keywords by search volume
                top_keywords = sorted(keywords, key=lambda x: x.get('search_volume', 0), reverse=True)[:5]
                print(f"\nüîù Top 5 Keywords by Search Volume:")
                for i, kw in enumerate(top_keywords, 1):
                    print(f"   {i}. {kw.get('keyword')} - Volume: {kw.get('search_volume', 0):,}, CPC: ${kw.get('cpc', 0):.2f}")
                
                # Show first keyword result in detail
                print(f"\nüìã First Keyword Result (detailed):")
                print(json.dumps(keywords[0], indent=2))
                
                # Save results to file
                filename = f'asha_health_keywords_live_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
                with open(filename, 'w') as f:
                    json.dump(result, f, indent=2)
                print(f"\nüíæ Results saved to: {filename}")
        else:
            print("\n‚ö†Ô∏è No keywords found in results")
            print(json.dumps(task, indent=2))
    else:
        print("\nNo tasks found in response")
        print(json.dumps(result, indent=2))
else:
    print(f"‚ùå Error: {response.status_code}")
    print(response.text)


Making live API call for: pawwallet
Date range: 2021-11-12 to 2025-11-11
Please wait...

Status Code: 20000
Status Message: Ok.
Cost: 0
Tasks Count: 1

Task Status Code: 40501
Task Status Message: Invalid Field: 'target'.
Task Cost: 0

‚ö†Ô∏è No keywords found in results
{
  "id": "11111607-1191-0369-0000-a9e73df04e3d",
  "status_code": 40501,
  "status_message": "Invalid Field: 'target'.",
  "time": "0 sec.",
  "cost": 0,
  "result_count": 0,
  "path": [
    "v3",
    "keywords_data",
    "google_ads",
    "keywords_for_site",
    "live"
  ],
  "data": {
    "api": "keywords_data",
    "function": "keywords_for_site",
    "se": "google_ads",
    "location_code": 2840,
    "target": "pawwallet",
    "sort_by": "relevance",
    "date_from": "2021-11-12",
    "date_to": "2025-11-11"
  },
  "result": null
}
