In [None]:
#required packages

!pip install pandas requests numpy tqdm

import pandas as pd
import requests
import base64
import time
import numpy as np
from urllib.parse import urlparse
from collections import deque
import json
from tqdm import tqdm



In [None]:
# Data sources
KEYWORD_LIST_URL = "https://docs.google.com/spreadsheets/d/1RVL2iATTp2h3Wx-KeDSkrzvdIPMZIUK4nQ9ik87U6o4/export?format=csv"
LINKBUILDER_DOMAINS_URL = "https://docs.google.com/spreadsheets/d/1gBEIThc5Lg3ZdkRMe7NQE8o371AlYt6b37rko-rdpsA/export?format=csv"

# DFS API configuration
DFS_EMAIL = "admin@wldm.io"
DFS_API_KEY = "cb54e37f6a4874eb"
DFS_BASE_URL = "https://api.dataforseo.com/v3"

def get_dfs_headers():
    """Auth headers for DFS requests."""
    creds = f"{DFS_EMAIL}:{DFS_API_KEY}"
    token = base64.b64encode(creds.encode()).decode()
    return {
        "Authorization": f"Basic {token}",
        "Content-Type": "application/json"
    }

print("API configuration loaded.")


API configuration loaded.


 Loads keyword and domain datasets with error handling and data validation.


In [None]:
## Cell 3: Data loading utilities

import pandas as pd
import requests

def load_keywords_data():
    """Load the keyword list from Google Sheets."""
    try:
        df = pd.read_csv(KEYWORD_LIST_URL)
        print(f"Loaded {len(df)} keywords.")
        return df
    except Exception as e:
        print(f"Error loading keywords: {e}")
        return pd.DataFrame()

def load_domains_data():
    """Load LinkBuilder domains from Google Sheets."""
    try:
        df = pd.read_csv(LINKBUILDER_DOMAINS_URL)
        print(f"Loaded {len(df)} domains.")
        return df
    except Exception as e:
        print(f"Error loading domains: {e}")
        return pd.DataFrame()

# Load datasets
keywords_df = load_keywords_data()
domains_df = load_domains_data()

# Preview
print("\nKeywords preview:")
print(keywords_df.head(2))
print("\nDomains preview:")
print(domains_df.head(2))


Loaded 1146 keywords.
Loaded 609 domains.

Keywords preview:
             Keyword          Target URL      Client(domain) Date updated
0  travel essentials  https://saily.com/  https://saily.com/   04/06/2025
1      eSIM provider  https://saily.com/  https://saily.com/   04/06/2025

Domains preview:
               Domain                                              Found  \
0   addis-techblog.de  PRPosting,Collaborator,PRNews,ICopify,Links St...   
1  internetblogger.de           PR-X,Links Stream,Serpzilla,Mellow Promo   

  Date of appearance in the parser  iGaming  Trust Flow (Maj)  \
0                       2025-11-11  IGaming              15.0   
1                       2025-11-11  IGaming              15.0   

  Trust Flow (Maj) update date  Citation Flow (Maj)  \
0                   2025-10-23                 34.0   
1                   2025-10-23                 16.0   

  Citation Flow (Maj update date)  Domains (Maj) Domains (Maj) update date  \
0                      2025-10

In [None]:
# KEYWORD FILTERING
# Extract keywords for any client by specifying domain

def get_client_keywords(client_domain):
    """Filter keywords for any client by domain"""
    client_keywords = keywords_df[keywords_df['Client(domain)'] == client_domain]

    print(f"‚úÖ Found {len(client_keywords)} keywords for {client_domain}")
    print(f"\nSample keywords for {client_domain}:")
    print(client_keywords['Keyword'].head(10))

    return client_keywords['Keyword'].tolist()

# Example usage for Stake.com
igaming_keywords = get_client_keywords('https://stake.com')

# Can easily switch to other clients:
# sports_keywords = get_client_keywords('https://sportsclient.com')


‚úÖ Found 559 keywords for https://stake.com

Sample keywords for https://stake.com:
587           2023 24 nba predictions picks betting odds
588    2023 24 premier league football picks odds pre...
589    2023 nfl season predictions super bowl lviii p...
590                          2024 french open picks odds
591       2024 online gambling betting statistics trends
592                                       2nd bundesliga
593                                        3 oaks gaming
594                                             3rd liga
595                                             a league
596                                               action
Name: Keyword, dtype: object


In [None]:
# KEYWORD EXTRACTION
# Reusable function to extract and store keywords for any client

def extract_client_keywords(client_domain):
    """Extract and store keywords for any specified client"""
    print(f"üîç Extracting keywords for {client_domain}...")

    client_keywords = keywords_df[keywords_df['Client(domain)'] == client_domain]
    keyword_list = client_keywords['Keyword'].tolist()

    print(f"‚úÖ Stored {len(keyword_list)} keywords for {client_domain}")
    print(f"üìù Sample: {keyword_list[:3]}")

    return keyword_list

# Example usage
igaming_keywords = extract_client_keywords('https://stake.com')

# For other clients:
# sports_keywords = extract_client_keywords('https://sportsclient.com')
# finance_keywords = extract_client_keywords('https://financeclient.com')

üîç Extracting keywords for https://stake.com...
‚úÖ Stored 559 keywords for https://stake.com
üìù Sample: ['2023 24 nba predictions picks betting odds', '2023 24 premier league football picks odds predictions', '2023 nfl season predictions super bowl lviii picks']


## **dfs keywords **


In [None]:
#  DFS CONNECTION TEST
# Verify API connectivity

print("üîå Testing DFS API connection...")

endpoint = f"{DFS_BASE_URL}/dataforseo_labs/google/ranked_keywords/live"
test_data = [{
    "target": "apple.com",
    "location_code": 2840,
    "language_code": "en",
    "limit": 5
}]

response = requests.post(endpoint, json=test_data, headers=get_dfs_headers())

if response.status_code == 200:
    print("‚úÖ DFS connection successful")
else:
    print(f"‚ùå Connection failed: {response.status_code}")
    print(f"Error: {response.text}")

üîå Testing DFS API connection...
‚úÖ DFS connection successful


In [None]:
# DFS KEYWORD
# Core function to get ranking keywords for any domain

def get_domain_keywords(domain, limit=100):
    """Extract ranking keywords and positions for any domain"""
    endpoint = f"{DFS_BASE_URL}/dataforseo_labs/google/ranked_keywords/live"
    data = [{
        "target": domain,
        "location_code": 2840,
        "language_code": "en",
        "limit": limit
    }]

    try:
        response = requests.post(endpoint, json=data, headers=get_dfs_headers())

        if response.status_code == 200:
            results = response.json()
            keywords = []

            if 'tasks' in results and results['tasks']:
                task = results['tasks'][0]
                if 'result' in task and task['result']:
                    for item in task['result']:
                        if 'items' in item and item['items']:
                            for keyword_item in item['items']:
                                keyword_data = keyword_item.get('keyword_data', {})
                                serp_data = keyword_item.get('ranked_serp_element', {}).get('serp_item', {})

                                keyword = keyword_data.get('keyword', '')
                                if keyword:
                                    keywords.append({
                                        'keyword': keyword,
                                        'position': serp_data.get('rank_absolute', 999),
                                        'search_volume': keyword_data.get('keyword_info', {}).get('search_volume', 0)
                                    })
            return keywords
        return []

    except Exception as e:
        print(f"‚ùå Error analyzing {domain}: {e}")
        return []

print(" Keywords ready")

 Keywords ready


In [None]:

# shows dataset structure and  domains

# Check available columns
available_columns = domains_df.columns.tolist()
print(f"Dataset columns: {available_columns}")

# Find domain column name
domain_column = None
for col in ['Domain', 'domain', 'URL', 'url', 'Website']:
    if col in domains_df.columns:
        domain_column = col
        break

if domain_column:
    print(f" domain column: '{domain_column}'")
    print(f"total domains: {len(domains_df)}")

    # show omains
    print("\nSample domains:")
    for domain in domains_df[domain_column].head(8):
        print(f"  {domain}")
else:
    print("No domain found")

Dataset columns: ['Domain', 'Found', 'Date of appearance in the parser', 'iGaming', 'Trust Flow (Maj)', 'Trust Flow (Maj) update date', 'Citation Flow (Maj)', 'Citation Flow (Maj update date)', 'Domains (Maj)', 'Domains (Maj) update date', 'Backlinks (Maj) update date', 'Domain Rating (DR)', 'Domain Rating (DR) update date', 'Domains (Ah)', 'Domains (Ah) update date', 'Backlinks (Ah) update date', 'Traffic (Ah)', 'Traffic (Ah) update date', 'Domain Authority (Moz)', 'Domain Authority (Moz) update date', 'Page Authority (Moz)', 'Page Authority (Moz) update date', 'Trust (CT) update date', 'Spam (CT) update date', 'Countries', 'Languages', 'Subjects', 'Article price via PRNews', 'Price of placement via Prposting', 'Price per article via Collaborator', 'Price per review via Collaborator', 'Price per release via Collaborator', 'Price per article via Serpzilla', 'Price per news via Serpzilla', 'Price per review via Serpzilla', 'Price per archive via Serpzilla', 'Price per article via PR-X',

come back to this

In [None]:
# LOAD DOMAIN DATA
# Read LinkBuilderfile

print("Loading domain data from LinkBuilder export...")

# Read the CSV file from LinkBuilder
domains_df = pd.read_csv('igaming-link-2025.csv')

print(f"Loaded {len(domains_df)} domains")
print(f"Available columns: {list(domains_df.columns)}")

# Preview data structure
print("\nData preview:")
print(domains_df.head(3))

Loading domain data from LinkBuilder export...


FileNotFoundError: [Errno 2] No such file or directory: 'igaming-link-2025.csv'

In [None]:
# QUICK DOMAIN FILTER
# Remove obvious non-iGaming sites

print("Applying quick domain filter...")

#  filter for iGaming-looking domains
igaming_domains = domains_df[
    domains_df['Domain'].str.contains('casino|poker|bet|gambl|slot', case=False, na=False)
]

print(f"Filtered to {len(igaming_domains)} likely iGaming domains")

Applying quick domain filter...
Filtered to 96 likely iGaming domains


In [None]:
# MAIN ANALYSIS PIPELINE
# Page-level analysis using existing configuration
#Runs the main analysis pipeline using existing configuration, filters domains, and checks for keywords in top 20 ranking

# Use domains from  URL
domains_df = pd.read_csv(LINKBUILDER_DOMAINS_URL)
domains_list = domains_df['Domain'].tolist()

print(f"Analyzing {len(domains_list)} domains")

# Simple domain filter to remove obvious non-iGaming sites
filtered_domains = [
    domain for domain in domains_list
    if any(term in domain.lower() for term in ['casino', 'poker', 'bet', 'slot'])
]

print(f"Filtered to {len(filtered_domains)} likely iGaming domains")

results = []
start_time = time.time()

for i, domain in enumerate(tqdm(filtered_domains)):
    print(f"\nAnalyzing {i+1}/{len(filtered_domains)}: {domain}")

    # Get keywords using existing function
    keywords = get_domain_keywords(domain, limit=50)

    domain_data = domains_df[domains_df['Domain'] == domain].iloc[0]
    dr = domain_data['Domain Rating (DR)']
    traffic = domain_data['Traffic (Ah)']

    if keywords:
        # Check for iGaming keywords in top 20 (Brie's requirement)
        top_20_keywords = keywords[:20]
        igaming_keywords = [
            kw for kw in top_20_keywords
            if any(term in kw['keyword'].lower() for term in ['casino', 'poker', 'bet', 'gambl', 'slot', 'blackjack'])
        ]

        has_igaming_top20 = len(igaming_keywords) > 0

        print(f"  Keywords: {len(keywords)}, iGaming in top 20: {len(igaming_keywords)}")

        results.append({
            'Page_URL': f"https://{domain}",
            'Domain': domain,
            'DR': dr,
            'Traffic': traffic,
            'Total_Keywords': len(keywords),
            'iGaming_In_Top20': len(igaming_keywords),
            'Meets_Criteria': 'YES' if (dr >= 20 and traffic >= 1500 and has_igaming_top20) else 'NO'
        })

    time.sleep(2)

# Create final results
results_df = pd.DataFrame(results)
qualified = results_df[results_df['Meets_Criteria'] == 'YES']

print(f"\nAnalysis complete: {len(qualified)} qualified pages found")
results_df.to_csv('client_analysis_results.csv', index=False)

Analyzing 609 domains
Filtered to 91 likely iGaming domains


  0%|          | 0/91 [00:00<?, ?it/s]


Analyzing 1/91: latestcasinosreviews.com


  1%|          | 1/91 [00:02<04:00,  2.67s/it]


Analyzing 2/91: casinopelitnetissa.com
  Keywords: 3, iGaming in top 20: 1


  2%|‚ñè         | 2/91 [00:05<04:04,  2.75s/it]


Analyzing 3/91: netticasinokolikkopelit.com
  Keywords: 1, iGaming in top 20: 1


  3%|‚ñé         | 3/91 [00:08<03:59,  2.72s/it]


Analyzing 4/91: netent-casinot.com





KeyboardInterrupt: 