In [1]:
import pandas as pd

keywords = pd.read_csv("unique_keywords.csv")
# Shuffle the keywords dataframe
# keywords = keywords.sample(frac=1, random_state=42).reset_index(drop=True)

keywords.shape

(195600, 1)

In [2]:
# Split keywords into batches of 10000
keyword_batches = []
batch_size = 1000
num_batches = (len(keywords) // batch_size) + (1 if len(keywords) % batch_size else 0)

for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = start_idx + batch_size
    batch = keywords.iloc[start_idx:end_idx]["Keyword"].tolist()
    payload = [{
        "keywords": batch,
        "sort_by": "relevance"
    }]
    keyword_batches.append(payload)

print(f"Split into {len(keyword_batches)} batches")
# for i, batch in enumerate(keyword_batches):
#     print(f"Batch {i+1} size: {len(batch[0]['keywords'])}")


Split into 196 batches


In [9]:
import dotenv
import os
from dataforseo_client import configuration as dfs_config, api_client as dfs_api_provider
from dataforseo_client.api.keywords_data_api import KeywordsDataApi
from dataforseo_client.rest import ApiException
from dataforseo_client.models.keywords_data_google_ads_search_volume_live_request_info import KeywordsDataGoogleAdsSearchVolumeLiveRequestInfo
import time

dotenv.load_dotenv()
login = os.getenv("DATA_FOR_SEO_LOGIN")
password = os.getenv("DATA_FOR_SEO_PASSWORD")
get_date_from = (time.strftime("%Y-%m-%d", time.gmtime(time.time() - 4 * 365 * 24 * 60 * 60)))
print(get_date_from)

def get_keyword_search_volume(keywords: list[str]):
    """
    Get search volume data for keywords from DataForSEO API
    
    Args:
        keywords (list): List of keyword strings to get data for
        
    Returns:
        dict: API response containing search volume data
    """
    configuration = dfs_config.Configuration(username=login, password=password)
    with dfs_api_provider.ApiClient(configuration) as api_client:
        # Create an instance of the API class
        keywords_api = KeywordsDataApi(api_client)
        
        try:
            # Extract just the keywords list from the payload dictionary
            if isinstance(keywords[0], dict):
                keywords = keywords[0]['keywords']
                
            api_response = keywords_api.google_ads_search_volume_live([KeywordsDataGoogleAdsSearchVolumeLiveRequestInfo(
                keywords=keywords,
                search_partners=True,
                date_from=get_date_from,
                date_to=None
            )])
            return api_response
            
        except ApiException as e:
            print(f"Exception: {e}\n")
            return None

2021-11-05


In [10]:
result = get_keyword_search_volume(keyword_batches[0])

In [11]:
# Save the API response to a file
import json

# Convert the response object to a dictionary
result_dict = result.to_dict()

# Save to JSON file with nice formatting
with open('search_volume_results_0.json', 'w') as f:
    json.dump(result_dict, f, indent=4)


In [12]:
# Create a directory for search volumes if it doesn't exist
import os
if not os.path.exists('search_volumes'):
    os.makedirs('search_volumes')

# Process each batch and save results
for i, batch in enumerate(keyword_batches[1:]):
    # Get search volume for this batch
    result = get_keyword_search_volume(batch)
    
    if result:
        # Convert response to dictionary
        result_dict = result.to_dict()
        
        # Save to numbered JSON file
        filename = f'search_volumes/search_volume_results_{i}.json'
        with open(filename, 'w') as f:
            json.dump(result_dict, f, indent=4)
        print(f"Saved results for batch {i} to {filename}")
    else:
        print(f"Failed to get results for batch {i}")


Saved results for batch 0 to search_volumes/search_volume_results_0.json
Saved results for batch 1 to search_volumes/search_volume_results_1.json
Saved results for batch 2 to search_volumes/search_volume_results_2.json
Saved results for batch 3 to search_volumes/search_volume_results_3.json
Saved results for batch 4 to search_volumes/search_volume_results_4.json
Saved results for batch 5 to search_volumes/search_volume_results_5.json
Saved results for batch 6 to search_volumes/search_volume_results_6.json
Saved results for batch 7 to search_volumes/search_volume_results_7.json
Saved results for batch 8 to search_volumes/search_volume_results_8.json
Saved results for batch 9 to search_volumes/search_volume_results_9.json
Saved results for batch 10 to search_volumes/search_volume_results_10.json
Saved results for batch 11 to search_volumes/search_volume_results_11.json
Saved results for batch 12 to search_volumes/search_volume_results_12.json
Saved results for batch 13 to search_volumes/