# SEO Audit — PainClinics.com

Analyzes 16 months of Google Search Console data across 6 dimensions:
1. Query Analysis & Clustering
2. Page Performance
3. Indexing & Coverage
4. 404 Recovery
5. "Near Me" & Generic Query Deep Dive
6. Action Items

In [1]:
import pandas as pd
import numpy as np
import re
import os
from pathlib import Path
from urllib.parse import urlparse

pd.set_option('display.max_columns', 20)
pd.set_option('display.max_colwidth', 80)
pd.set_option('display.width', 200)

AUDIT_DIR = Path('.')
if not (AUDIT_DIR / 'painclinics.com-Performance-on-Search-2026-02-25 - Queries.csv').exists():
    AUDIT_DIR = Path('docs/seo-audit')

print(f'Audit directory: {AUDIT_DIR.resolve()}')
print('Files:', [f.name for f in AUDIT_DIR.iterdir() if f.suffix in ('.csv', '.json')])

Audit directory: /Users/kylesweezey/cc/painclinics/docs/seo-audit
Files: ['painclinics.com-Performance-on-Search-2026-02-25 - Queries.csv', 'painclinics.com-Coverage-2026-02-25 - Chart.csv', 'painclinics.com-Performance-on-Search-2026-02-25 - Pages.csv']


In [2]:
# Load CSVs
queries_file = list(AUDIT_DIR.glob('*Queries*.csv'))[0]
pages_file = list(AUDIT_DIR.glob('*Pages*.csv'))[0]
coverage_file = list(AUDIT_DIR.glob('*Coverage*.csv'))[0]

queries = pd.read_csv(queries_file, dtype_backend='numpy_nullable')
pages = pd.read_csv(pages_file, dtype_backend='numpy_nullable')
coverage = pd.read_csv(coverage_file, dtype_backend='numpy_nullable')

# Normalize CTR from percentage string to float
for df in [queries, pages]:
    df['CTR'] = df['CTR'].astype(str).str.rstrip('%').astype(float)
    df['Clicks'] = pd.to_numeric(df['Clicks'], errors='coerce')
    df['Impressions'] = pd.to_numeric(df['Impressions'], errors='coerce')
    df['Position'] = pd.to_numeric(df['Position'], errors='coerce')

# Rename first column for consistency
queries.rename(columns={queries.columns[0]: 'Query'}, inplace=True)
pages.rename(columns={pages.columns[0]: 'Page'}, inplace=True)

print(f'Queries: {len(queries)} rows')
print(f'Pages: {len(pages)} rows')
print(f'Coverage: {len(coverage)} rows')
print(f'Queries dtypes: {dict(queries.dtypes)}')
print()
queries.head(3)

Queries: 1000 rows
Pages: 1000 rows
Coverage: 89 rows
Queries dtypes: {'Query': <StringDtype(storage='python', na_value=<NA>)>, 'Clicks': Int64Dtype(), 'Impressions': Int64Dtype(), 'CTR': dtype('float64'), 'Position': Float64Dtype()}



Unnamed: 0,Query,Clicks,Impressions,CTR,Position
0,center for symptom relief,279,1492,18.7,4.02
1,pain management near me,277,21110,1.31,20.11
2,open arms pain clinic,268,4581,5.85,6.0


---
## 1A: Query Analysis & Clustering

In [3]:
# --- Condition and treatment keyword lists ---
CONDITIONS = [
    'back pain', 'neck pain', 'sciatica', 'neuropathy', 'fibromyalgia',
    'arthritis', 'migraine', 'headache', 'knee pain', 'hip pain',
    'shoulder pain', 'chronic pain', 'joint pain', 'nerve pain',
    'herniated disc', 'spinal stenosis', 'degenerative disc',
    'carpal tunnel', 'plantar fasciitis', 'complex regional',
    'crps', 'radiculopathy', 'myofascial', 'tendonitis',
    'whiplash', 'post surgical pain', 'cancer pain',
    'pelvic pain', 'abdominal pain', 'chest pain',
]

TREATMENTS = [
    'injection', 'nerve block', 'epidural', 'steroid',
    'physical therapy', 'spinal cord stimulator', 'spinal cord stimulation',
    'radiofrequency ablation', 'rfa', 'acupuncture', 'chiropractic',
    'massage therapy', 'regenerative', 'prp', 'platelet rich plasma',
    'stem cell', 'ketamine', 'infusion', 'tens', 'biofeedback',
    'cognitive behavioral', 'medication management', 'opioid',
    'suboxone', 'trigger point', 'facet joint', 'si joint',
    'kyphoplasty', 'vertebroplasty', 'discography', 'intrathecal pump',
    'botox', 'cortisone',
]

# US state names and abbreviations for city-state detection
STATES = [
    'alabama','alaska','arizona','arkansas','california','colorado',
    'connecticut','delaware','florida','georgia','hawaii','idaho',
    'illinois','indiana','iowa','kansas','kentucky','louisiana',
    'maine','maryland','massachusetts','michigan','minnesota',
    'mississippi','missouri','montana','nebraska','nevada',
    'new hampshire','new jersey','new mexico','new york',
    'north carolina','north dakota','ohio','oklahoma','oregon',
    'pennsylvania','rhode island','south carolina','south dakota',
    'tennessee','texas','utah','vermont','virginia','washington',
    'west virginia','wisconsin','wyoming',
    ' al ', ' ak ', ' az ', ' ar ', ' ca ', ' co ', ' ct ', ' de ',
    ' fl ', ' ga ', ' hi ', ' id ', ' il ', ' in ', ' ia ', ' ks ',
    ' ky ', ' la ', ' me ', ' md ', ' ma ', ' mi ', ' mn ', ' ms ',
    ' mo ', ' mt ', ' ne ', ' nv ', ' nh ', ' nj ', ' nm ', ' ny ',
    ' nc ', ' nd ', ' oh ', ' ok ', ' or ', ' pa ', ' ri ', ' sc ',
    ' sd ', ' tn ', ' tx ', ' ut ', ' vt ', ' va ', ' wa ', ' wv ',
    ' wi ', ' wy ',
]

def classify_query(q):
    q_lower = f' {q.lower()} '
    
    if 'painclinics' in q_lower or 'pain clinics directory' in q_lower:
        return 'brand'
    
    if any(kw in q_lower for kw in ['near me', 'near ', 'nearby', 'close to me']):
        return 'near-me'
    
    if any(kw in q_lower for kw in CONDITIONS):
        return 'condition'
    
    if any(kw in q_lower for kw in TREATMENTS):
        return 'treatment'
    
    if any(st in q_lower for st in STATES):
        return 'city-state'
    
    # Check for common clinic-name patterns: "dr ", "clinic", "center", "associates", etc.
    clinic_indicators = ['dr ', 'dr.', 'clinic', 'center', 'associates', 'medical', 'health',
                         'wellness', 'rehab', 'institute', 'group', 'specialists',
                         'physicians', 'care ', 'pllc', 'llc', 'md ']
    if any(kw in q_lower for kw in clinic_indicators):
        return 'clinic-name'
    
    if 'pain management' in q_lower or 'pain doctor' in q_lower or 'pain specialist' in q_lower:
        return 'generic'
    
    return 'generic'

# Ensure numeric types for aggregation (pandas 3.0 uses StringDtype by default)
for col in ['Clicks', 'Impressions', 'Position', 'CTR']:
    queries[col] = pd.to_numeric(queries[col], errors='coerce')

queries['Cluster'] = queries['Query'].apply(classify_query)

print('Query cluster distribution:')
cluster_summary = queries.groupby('Cluster').agg(
    count=('Query', 'size'),
    total_impressions=('Impressions', 'sum'),
    total_clicks=('Clicks', 'sum'),
    avg_position=('Position', 'mean'),
    avg_ctr=('CTR', 'mean'),
).sort_values('total_impressions', ascending=False)
cluster_summary

Query cluster distribution:


Unnamed: 0_level_0,count,total_impressions,total_clicks,avg_position,avg_ctr
Cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
clinic-name,383,282041,5409,9.193786,4.148355
generic,269,218093,2879,9.304647,4.45974
city-state,313,154422,3547,9.143099,4.878562
near-me,25,62314,798,20.198,2.9592
condition,9,8140,223,6.165556,7.998889
treatment,1,60,5,39.5,8.33


In [4]:
# Top queries by impressions (where Google thinks you're relevant)
print('Top 25 queries by impressions:')
queries.nlargest(25, 'Impressions')[['Query', 'Clicks', 'Impressions', 'CTR', 'Position', 'Cluster']]

Top 25 queries by impressions:


Unnamed: 0,Query,Clicks,Impressions,CTR,Position,Cluster
1,pain management near me,277,21110,1.31,20.11,near-me
21,pain management,76,11602,0.66,34.37,generic
38,jawad bhatti md,47,9800,0.48,8.17,city-state
54,pain clinic,35,9564,0.37,21.01,clinic-name
20,pain clinic near me,78,9059,0.86,17.22,near-me
113,laurel pain clinic,20,6483,0.31,11.13,clinic-name
258,rockford pain center,11,6430,0.17,9.38,clinic-name
437,peninsula pain clinic,7,6426,0.11,16.75,clinic-name
285,clearway pain solutions,10,6346,0.16,8.43,generic
363,pain management clinic,8,5894,0.14,25.55,clinic-name


In [5]:
# CTR by position bucket
def position_bucket(pos):
    if pos <= 3:
        return '1-3'
    elif pos <= 10:
        return '4-10'
    elif pos <= 20:
        return '11-20'
    else:
        return '21+'

queries['PosBucket'] = queries['Position'].apply(position_bucket)

bucket_stats = queries.groupby('PosBucket').agg(
    queries_count=('Query', 'size'),
    total_impressions=('Impressions', 'sum'),
    total_clicks=('Clicks', 'sum'),
    avg_ctr=('CTR', 'mean'),
).reindex(['1-3', '4-10', '11-20', '21+'])
bucket_stats['weighted_ctr'] = (bucket_stats['total_clicks'] / bucket_stats['total_impressions'] * 100).round(2)

print('CTR by position bucket:')
bucket_stats

CTR by position bucket:


Unnamed: 0_level_0,queries_count,total_impressions,total_clicks,avg_ctr,weighted_ctr
PosBucket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-3,29,11034,1052,14.427241,9.53
4-10,721,428804,9189,4.540943,2.14
11-20,168,146093,1482,3.588631,1.01
21+,82,139139,1138,2.127805,0.82


In [6]:
# Quick wins: position 4-15, 50+ impressions — sorted by impressions desc
quick_wins = queries[
    (queries['Position'] >= 4) & 
    (queries['Position'] <= 15) & 
    (queries['Impressions'] >= 50)
].sort_values('Impressions', ascending=False).copy()

print(f'Quick wins: {len(quick_wins)} queries at position 4-15 with 50+ impressions')
quick_wins.head(20)[['Query', 'Clicks', 'Impressions', 'CTR', 'Position', 'Cluster']]

Quick wins: 762 queries at position 4-15 with 50+ impressions


Unnamed: 0,Query,Clicks,Impressions,CTR,Position,Cluster
38,jawad bhatti md,47,9800,0.48,8.17,city-state
113,laurel pain clinic,20,6483,0.31,11.13,clinic-name
258,rockford pain center,11,6430,0.17,9.38,clinic-name
285,clearway pain solutions,10,6346,0.16,8.43,generic
40,knee pain centers of america,46,5775,0.8,8.15,condition
286,sweetwater pain and spine,10,5027,0.2,5.84,generic
190,dominion spine and pain,14,4869,0.29,4.88,generic
260,commonwealth pain and spine,11,4674,0.24,7.3,generic
8,spine and pain associates,122,4593,2.66,6.45,clinic-name
2,open arms pain clinic,268,4581,5.85,6.0,clinic-name


In [7]:
# Gap analysis: high impressions but position >10 (ranking opportunity)
gaps = queries[
    (queries['Position'] > 10) & 
    (queries['Impressions'] >= 100)
].sort_values('Impressions', ascending=False)

print(f'Ranking opportunities: {len(gaps)} queries with 100+ impressions but position >10')
gaps.head(20)[['Query', 'Clicks', 'Impressions', 'CTR', 'Position', 'Cluster']]

Ranking opportunities: 218 queries with 100+ impressions but position >10


Unnamed: 0,Query,Clicks,Impressions,CTR,Position,Cluster
1,pain management near me,277,21110,1.31,20.11,near-me
21,pain management,76,11602,0.66,34.37,generic
54,pain clinic,35,9564,0.37,21.01,clinic-name
20,pain clinic near me,78,9059,0.86,17.22,near-me
113,laurel pain clinic,20,6483,0.31,11.13,clinic-name
437,peninsula pain clinic,7,6426,0.11,16.75,clinic-name
363,pain management clinic,8,5894,0.14,25.55,clinic-name
541,national spine and pain center,6,5321,0.11,15.58,clinic-name
25,pain management doctors near me,69,5262,1.31,29.36,near-me
85,pain management doctor near me,24,5010,0.48,27.22,near-me


In [8]:
# CTR anomalies: position <5 but CTR <3% — title/description problems
ctr_anomalies = queries[
    (queries['Position'] < 5) & 
    (queries['CTR'] < 3) &
    (queries['Impressions'] >= 50)
].sort_values('Impressions', ascending=False)

print(f'CTR anomalies: {len(ctr_anomalies)} queries with position <5 but CTR <3%')
ctr_anomalies.head(20)[['Query', 'Clicks', 'Impressions', 'CTR', 'Position', 'Cluster']]

CTR anomalies: 34 queries with position <5 but CTR <3%


Unnamed: 0,Query,Clicks,Impressions,CTR,Position,Cluster
190,dominion spine and pain,14,4869,0.29,4.88,generic
51,center for complex pain care,37,2523,1.47,4.1,clinic-name
191,summit pain management marion indiana,14,1911,0.73,4.98,city-state
460,sweetwater pain and spine sparks,7,1084,0.65,4.75,generic
697,dominion spine and pain springfield va,5,946,0.53,4.4,city-state
142,three crosses pain management,17,832,2.04,4.85,generic
195,dr silver,14,786,1.78,4.93,clinic-name
168,dr vish richmond indiana,15,739,2.03,4.65,city-state
115,spine and pain associates chattanooga,20,699,2.86,4.61,clinic-name
570,portland pain solutions,6,544,1.1,4.77,generic


---
## 1B: Page Performance Analysis

In [9]:
# Classify page types from URL
def classify_page(url):
    path = urlparse(url).path.rstrip('/')
    query = urlparse(url).query
    
    if path == '' or path == '/':
        if 'clinics=' in query:
            return 'clinic-old-url'  # Legacy /?clinics= format
        return 'homepage'
    
    if path.startswith('/pain-management/'):
        parts = path.replace('/pain-management/', '').strip('/').split('/')
        if len(parts) == 1:
            slug = parts[0]
            # State pages are 2-letter abbreviations
            if re.match(r'^[a-z]{2}$', slug):
                return 'state'
            return 'clinic'
        elif len(parts) == 2:
            # state/city
            if re.match(r'^[a-z]{2}$', parts[0]):
                return 'city'
            return 'clinic'
        return 'clinic'
    
    if path.startswith('/blog'):
        return 'blog'
    
    if path in ['/about', '/faq', '/contact', '/privacy', '/terms', '/claim']:
        return 'static'
    
    if path.startswith('/directory') or path.startswith('/search'):
        return 'directory'
    
    return 'other'

# Ensure numeric types for pages too
for col in ['Clicks', 'Impressions', 'Position', 'CTR']:
    pages[col] = pd.to_numeric(pages[col], errors='coerce')

pages['PageType'] = pages['Page'].apply(classify_page)

print('Page type distribution:')
type_summary = pages.groupby('PageType').agg(
    count=('Page', 'size'),
    total_impressions=('Impressions', 'sum'),
    total_clicks=('Clicks', 'sum'),
    avg_position=('Position', 'mean'),
    avg_ctr=('CTR', 'mean'),
).sort_values('total_impressions', ascending=False)
type_summary

Page type distribution:


Unnamed: 0_level_0,count,total_impressions,total_clicks,avg_position,avg_ctr
PageType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
clinic,899,1969464,36614,12.91921,2.626585
clinic-old-url,95,287135,5461,21.290632,2.318632
other,5,54128,315,24.636,1.832
homepage,1,1935,15,48.29,0.78


In [10]:
# Top pages by impressions (not clicks) — shows underperforming pages
print('Top 25 pages by impressions:')
pages.nlargest(25, 'Impressions')[['Page', 'Clicks', 'Impressions', 'CTR', 'Position', 'PageType']]

Top 25 pages by impressions:


Unnamed: 0,Page,Clicks,Impressions,CTR,Position,PageType
47,https://painclinics.com/ergonomic-adjustments-for-knee-pain/,114,29384,0.39,25.69,other
0,https://painclinics.com/?clinics=center-for-symptom-relief-llc,981,20009,4.9,12.84,clinic-old-url
470,https://painclinics.com/visual-analog-scale/,27,17140,0.16,32.37,other
9,https://painclinics.com/pain-management/jawad-bhatti-md-va-23225/,292,16142,1.81,8.18,clinic
4,https://painclinics.com/?clinics=comprehensive-pain-and-rehab-of-northwest-a...,451,14260,3.16,14.28,clinic-old-url
2,https://painclinics.com/?clinics=csar-pllc-dr-oliver-c-james-md,652,14139,4.61,13.71,clinic-old-url
89,https://painclinics.com/pain-management/the-pain-management-group-clarksvill...,80,13659,0.59,11.76,clinic
276,https://painclinics.com/pain-management/relief-spine-pain-center-al-35968/,39,12416,0.31,9.33,clinic
10,https://painclinics.com/pain-management/spine-pain-associates-tn-37660/,279,12189,2.29,12.71,clinic
102,https://painclinics.com/?clinics=access-pain-solutions-tulsa-pain-management...,75,11988,0.63,25.75,clinic-old-url


In [11]:
# Pages with high impressions but low CTR — meta title/description issues
underperforming_pages = pages[
    (pages['Impressions'] >= 500) &
    (pages['CTR'] < 2)
].sort_values('Impressions', ascending=False)

print(f'Underperforming pages: {len(underperforming_pages)} pages with 500+ impressions and CTR <2%')
underperforming_pages.head(15)[['Page', 'Clicks', 'Impressions', 'CTR', 'Position', 'PageType']]

Underperforming pages: 499 pages with 500+ impressions and CTR <2%


Unnamed: 0,Page,Clicks,Impressions,CTR,Position,PageType
47,https://painclinics.com/ergonomic-adjustments-for-knee-pain/,114,29384,0.39,25.69,other
470,https://painclinics.com/visual-analog-scale/,27,17140,0.16,32.37,other
9,https://painclinics.com/pain-management/jawad-bhatti-md-va-23225/,292,16142,1.81,8.18,clinic
89,https://painclinics.com/pain-management/the-pain-management-group-clarksvill...,80,13659,0.59,11.76,clinic
276,https://painclinics.com/pain-management/relief-spine-pain-center-al-35968/,39,12416,0.31,9.33,clinic
102,https://painclinics.com/?clinics=access-pain-solutions-tulsa-pain-management...,75,11988,0.63,25.75,clinic-old-url
426,https://painclinics.com/pain-management/national-spine-pain-centers-winchest...,29,10311,0.28,17.09,clinic
427,https://painclinics.com/pain-management/peninsula-pain-clinic-silverdale-a-s...,29,9856,0.29,14.72,clinic
254,https://painclinics.com/pain-management/ms-pain-migraine-alan-k-cole-md-ms-3...,41,9783,0.42,11.49,clinic
199,https://painclinics.com/pain-management/the-center-for-pain-dothan-al-36303/,51,9771,0.52,31.62,clinic


In [12]:
# Orphaned pages: zero clicks despite some impressions
orphaned = pages[
    (pages['Clicks'] == 0) &
    (pages['Impressions'] >= 10)
].sort_values('Impressions', ascending=False)

print(f'Orphaned pages (0 clicks, 10+ impressions): {len(orphaned)}')
orphaned.head(15)[['Page', 'Impressions', 'Position', 'PageType']]

Orphaned pages (0 clicks, 10+ impressions): 0


Unnamed: 0,Page,Impressions,Position,PageType


---
## 1C: Indexing & Coverage Analysis

In [13]:
# The coverage CSV from GSC has date-level data. Analyze the most recent snapshot.
print('Coverage data columns:', coverage.columns.tolist())
print()

# Parse the date-based coverage data
coverage['Date'] = pd.to_datetime(coverage['Date'])

# Convert numeric columns
for col in ['Indexed', 'Not indexed', 'Impressions']:
    if col in coverage.columns:
        coverage[col] = pd.to_numeric(coverage[col], errors='coerce')

# Get the most recent row with both indexed and not-indexed values
indexed_col = 'Indexed' if 'Indexed' in coverage.columns else None
if indexed_col:
    recent = coverage.dropna(subset=[indexed_col]).tail(5)
    print('Recent coverage snapshots:')
    display(recent)
else:
    print('No "Indexed" column found in coverage data')
    print(coverage.head())

Coverage data columns: ['Date', 'Not indexed', 'Indexed', 'Impressions']

Recent coverage snapshots:


Unnamed: 0,Date,Not indexed,Indexed,Impressions
84,2026-02-19,73152,12428,64806.0
85,2026-02-20,73152,12428,38176.0
86,2026-02-21,74407,12122,29629.0
87,2026-02-22,74407,12122,78297.0
88,2026-02-23,74407,12122,


In [14]:
# Coverage trend
not_indexed_col = 'Not indexed'
indexed_col = 'Indexed'

if indexed_col in coverage.columns and not_indexed_col in coverage.columns:
    coverage_with_data = coverage.dropna(subset=[indexed_col, not_indexed_col])
    if len(coverage_with_data) > 0:
        latest = coverage_with_data.iloc[-1]
        indexed_val = float(latest[indexed_col])
        not_indexed_val = float(latest[not_indexed_col])
        print(f"Latest coverage snapshot ({latest['Date'].strftime('%Y-%m-%d')}):")
        print(f"  Indexed: {indexed_val:.0f}")
        print(f"  Not indexed: {not_indexed_val:.0f}")
        total = indexed_val + not_indexed_val
        if total > 0:
            pct = indexed_val / total * 100
            print(f"  Index rate: {pct:.1f}%")
    else:
        print('No rows with both Indexed and Not indexed values.')
else:
    print('Coverage CSV does not have Indexed/Not indexed breakdown.')
    print('Available columns:', coverage.columns.tolist())
    print()
    print('Raw coverage data sample:')
    print(coverage.head(10))

Latest coverage snapshot (2026-02-23):
  Indexed: 12122
  Not indexed: 74407
  Index rate: 14.0%


---
## 1D: 404 Recovery Audit

Cross-reference any known 404 patterns with pages that still receive impressions.

In [15]:
# Check for legacy /?clinics= URLs still getting impressions
legacy_urls = pages[pages['PageType'] == 'clinic-old-url'].sort_values('Impressions', ascending=False)

print(f'Legacy /?clinics= URLs still getting impressions: {len(legacy_urls)}')
print(f'Total impressions on legacy URLs: {legacy_urls["Impressions"].sum():,}')
print(f'Total clicks on legacy URLs: {legacy_urls["Clicks"].sum():,}')
print()
legacy_urls.head(15)[['Page', 'Clicks', 'Impressions', 'CTR', 'Position']]

Legacy /?clinics= URLs still getting impressions: 95
Total impressions on legacy URLs: 287,135
Total clicks on legacy URLs: 5,461



Unnamed: 0,Page,Clicks,Impressions,CTR,Position
0,https://painclinics.com/?clinics=center-for-symptom-relief-llc,981,20009,4.9,12.84
4,https://painclinics.com/?clinics=comprehensive-pain-and-rehab-of-northwest-a...,451,14260,3.16,14.28
2,https://painclinics.com/?clinics=csar-pllc-dr-oliver-c-james-md,652,14139,4.61,13.71
102,https://painclinics.com/?clinics=access-pain-solutions-tulsa-pain-management...,75,11988,0.63,25.75
643,https://painclinics.com/?clinics=pain-and-spine-specialists-of-pennsylvania-...,21,9654,0.22,13.61
58,https://painclinics.com/?clinics=cottonwood-medical-clinic,99,9423,1.05,32.61
218,https://painclinics.com/?clinics=laurel-pain-clinic,48,9417,0.51,12.8
80,https://painclinics.com/?clinics=access-pain-solutions-muskogee-pain-management,86,8791,0.98,10.74
155,https://painclinics.com/?clinics=dominion-spine-and-pain-springfield-va,61,8391,0.73,8.97
97,https://painclinics.com/?clinics=superior-pain-management,76,8195,0.93,17.92


In [16]:
# Pages with www vs non-www — potential duplicate/redirect issues
www_pages = pages[pages['Page'].str.contains('://www.', regex=False)]

print(f'Pages with www prefix: {len(www_pages)}')
if len(www_pages) > 0:
    print(f'Total impressions on www pages: {www_pages["Impressions"].sum():,}')
    www_pages.head(10)[['Page', 'Clicks', 'Impressions', 'CTR', 'Position']]

Pages with www prefix: 363
Total impressions on www pages: 511,381


---
## 1E: "Near Me" & Generic Query Deep Dive

In [17]:
# All "near me" queries
near_me = queries[queries['Cluster'] == 'near-me'].sort_values('Impressions', ascending=False)

print(f'"Near me" queries: {len(near_me)}')
print(f'Total "near me" impressions: {near_me["Impressions"].sum():,}')
print(f'Total "near me" clicks: {near_me["Clicks"].sum():,}')
print(f'Average position: {near_me["Position"].mean():.1f}')
print()
near_me.head(20)[['Query', 'Clicks', 'Impressions', 'CTR', 'Position']]

"Near me" queries: 25
Total "near me" impressions: 62,314
Total "near me" clicks: 798
Average position: 20.2



Unnamed: 0,Query,Clicks,Impressions,CTR,Position
1,pain management near me,277,21110,1.31,20.11
20,pain clinic near me,78,9059,0.86,17.22
25,pain management doctors near me,69,5262,1.31,29.36
85,pain management doctor near me,24,5010,0.48,27.22
16,pain clinics near me,85,3584,2.37,15.46
79,pain specialist near me,25,2714,0.92,20.0
439,pain management specialist near me,7,2631,0.27,39.66
126,pain doctor near me,19,2230,0.85,22.34
326,pain management clinic near me,9,2045,0.44,28.85
42,pain doctors near me,45,2025,2.22,19.08


In [18]:
# Condition-specific queries — what conditions drive the most impressions?
condition_queries = queries[queries['Cluster'] == 'condition'].sort_values('Impressions', ascending=False)

print(f'Condition queries: {len(condition_queries)}')
print(f'Total impressions: {condition_queries["Impressions"].sum():,}')
print()
condition_queries.head(20)[['Query', 'Clicks', 'Impressions', 'CTR', 'Position']]

Condition queries: 9
Total impressions: 8,140



Unnamed: 0,Query,Clicks,Impressions,CTR,Position
40,knee pain centers of america,46,5775,0.8,8.15
23,comprehensive pain and headache treatment center,76,443,17.16,4.88
733,chronic pain management,5,443,1.13,6.83
948,knee pain centers of america: knee pain treatment in michigan | knee pain \n...,4,356,1.12,5.16
56,"center for chronic pain management johnson city, tn",34,339,10.03,4.65
224,premier chronic pain care,13,294,4.42,7.81
978,knee pain centers of america reviews,4,252,1.59,8.99
112,chronic pain management queensbury,21,143,14.69,2.38
123,center for chronic pain management,20,95,21.05,6.64


In [19]:
# Treatment-specific queries
treatment_queries = queries[queries['Cluster'] == 'treatment'].sort_values('Impressions', ascending=False)

print(f'Treatment queries: {len(treatment_queries)}')
print(f'Total impressions: {treatment_queries["Impressions"].sum():,}')
print()
treatment_queries.head(20)[['Query', 'Clicks', 'Impressions', 'CTR', 'Position']]

Treatment queries: 1
Total impressions: 60



Unnamed: 0,Query,Clicks,Impressions,CTR,Position
840,center for regenerative orthopedic medicine,5,60,8.33,39.5


In [20]:
# Generic queries (pain management, pain doctor, etc.) — highest-value non-branded
generic_queries = queries[queries['Cluster'] == 'generic'].sort_values('Impressions', ascending=False)

print(f'Generic queries: {len(generic_queries)}')
print(f'Total impressions: {generic_queries["Impressions"].sum():,}')
print()
generic_queries.head(20)[['Query', 'Clicks', 'Impressions', 'CTR', 'Position']]

Generic queries: 269
Total impressions: 218,093



Unnamed: 0,Query,Clicks,Impressions,CTR,Position
21,pain management,76,11602,0.66,34.37
285,clearway pain solutions,10,6346,0.16,8.43
286,sweetwater pain and spine,10,5027,0.2,5.84
190,dominion spine and pain,14,4869,0.29,4.88
260,commonwealth pain and spine,11,4674,0.24,7.3
75,advanced pain management,26,4440,0.59,23.73
125,comprehensive pain management,19,4130,0.46,35.91
542,integrated pain management,6,4096,0.15,18.73
682,national spine and pain,5,3629,0.14,15.07
86,access pain solutions,24,3601,0.67,9.98


In [21]:
# Content gap analysis: treatment/condition queries with no dedicated page
# These queries have high impressions but the site likely ranks with homepage or clinic pages
high_value_nonbrand = queries[
    queries['Cluster'].isin(['near-me', 'condition', 'treatment', 'generic', 'city-state']) &
    (queries['Impressions'] >= 50)
].sort_values('Impressions', ascending=False)

print(f'High-value non-brand queries (50+ impressions): {len(high_value_nonbrand)}')
print(f'Total impressions: {high_value_nonbrand["Impressions"].sum():,}')
print()

# Group by cluster
hv_by_cluster = high_value_nonbrand.groupby('Cluster').agg(
    queries=('Query', 'size'),
    total_impressions=('Impressions', 'sum'),
    avg_position=('Position', 'mean'),
).sort_values('total_impressions', ascending=False)
hv_by_cluster

High-value non-brand queries (50+ impressions): 591
Total impressions: 442,262



Unnamed: 0_level_0,queries,total_impressions,avg_position
Cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
generic,260,217858,9.376692
city-state,296,153890,9.151149
near-me,25,62314,20.198
condition,9,8140,6.165556
treatment,1,60,39.5


---
## 1F: Action Items Generator

In [22]:
action_items = []

# 1. Meta title/description rewrites for CTR anomalies
for _, row in ctr_anomalies.iterrows():
    action_items.append({
        'priority': 'high' if row['Impressions'] >= 200 else 'medium',
        'type': 'meta',
        'query_or_url': row['Query'],
        'action': f'Rewrite meta title/description — position {row["Position"]:.1f} but only {row["CTR"]:.1f}% CTR',
        'impressions': row['Impressions'],
        'expected_impact': 'Improve CTR from current position'
    })

# 2. Quick win content optimization
for _, row in quick_wins.head(30).iterrows():
    if row['Cluster'] in ('near-me', 'condition', 'treatment', 'generic', 'city-state'):
        action_items.append({
            'priority': 'high',
            'type': 'content',
            'query_or_url': row['Query'],
            'action': f'Optimize content for this {row["Cluster"]} query — position {row["Position"]:.1f}, {row["Impressions"]} impressions',
            'impressions': row['Impressions'],
            'expected_impact': 'Move from page 1-2 to top 3'
        })

# 3. Legacy URL redirects
for _, row in legacy_urls.head(20).iterrows():
    slug = urlparse(row['Page']).query.replace('clinics=', '')
    action_items.append({
        'priority': 'high' if row['Impressions'] >= 1000 else 'medium',
        'type': 'redirect',
        'query_or_url': row['Page'],
        'action': f'Redirect legacy /?clinics={slug} to canonical URL — {row["Impressions"]:,} impressions',
        'impressions': row['Impressions'],
        'expected_impact': 'Consolidate ranking signals to canonical URL'
    })

# 4. www redirect consolidation
for _, row in www_pages.head(10).iterrows():
    action_items.append({
        'priority': 'medium',
        'type': 'redirect',
        'query_or_url': row['Page'],
        'action': f'Ensure www → non-www redirect — {row["Impressions"]:,} impressions on www version',
        'impressions': row['Impressions'],
        'expected_impact': 'Consolidate duplicate URLs'
    })

# 5. Treatment content gaps — high impression treatment queries with poor ranking
treatment_gaps = treatment_queries[
    (treatment_queries['Position'] > 15) &
    (treatment_queries['Impressions'] >= 50)
]
for _, row in treatment_gaps.iterrows():
    action_items.append({
        'priority': 'medium',
        'type': 'new-page',
        'query_or_url': row['Query'],
        'action': f'Consider treatment landing page — currently position {row["Position"]:.0f} for {row["Impressions"]} impressions',
        'impressions': row['Impressions'],
        'expected_impact': 'Dedicated content could rank for treatment queries'
    })

# 6. Underperforming pages need meta/content fixes
for _, row in underperforming_pages.head(15).iterrows():
    action_items.append({
        'priority': 'high' if row['Impressions'] >= 2000 else 'medium',
        'type': 'meta',
        'query_or_url': row['Page'],
        'action': f'Improve meta for underperforming page — {row["Impressions"]:,} impressions, {row["CTR"]:.1f}% CTR, position {row["Position"]:.1f}',
        'impressions': row['Impressions'],
        'expected_impact': 'Higher CTR from existing impressions'
    })

actions_df = pd.DataFrame(action_items)
# Sort: high priority first, then by impressions
priority_order = {'high': 0, 'medium': 1, 'low': 2}
actions_df['_priority_sort'] = actions_df['priority'].map(priority_order)
actions_df = actions_df.sort_values(['_priority_sort', 'impressions'], ascending=[True, False]).drop(columns='_priority_sort')

print(f'Total action items: {len(actions_df)}')
print(f'  High priority: {(actions_df["priority"] == "high").sum()}')
print(f'  Medium priority: {(actions_df["priority"] == "medium").sum()}')
print()
actions_df.head(20)

Total action items: 100
  High priority: 86
  Medium priority: 14



Unnamed: 0,priority,type,query_or_url,action,impressions,expected_impact
85,high,meta,https://painclinics.com/ergonomic-adjustments-for-knee-pain/,"Improve meta for underperforming page — 29,384 impressions, 0.4% CTR, positi...",29384,Higher CTR from existing impressions
54,high,redirect,https://painclinics.com/?clinics=center-for-symptom-relief-llc,Redirect legacy /?clinics=center-for-symptom-relief-llc to canonical URL — 2...,20009,Consolidate ranking signals to canonical URL
86,high,meta,https://painclinics.com/visual-analog-scale/,"Improve meta for underperforming page — 17,140 impressions, 0.2% CTR, positi...",17140,Higher CTR from existing impressions
87,high,meta,https://painclinics.com/pain-management/jawad-bhatti-md-va-23225/,"Improve meta for underperforming page — 16,142 impressions, 1.8% CTR, positi...",16142,Higher CTR from existing impressions
55,high,redirect,https://painclinics.com/?clinics=comprehensive-pain-and-rehab-of-northwest-a...,Redirect legacy /?clinics=comprehensive-pain-and-rehab-of-northwest-arkansas...,14260,Consolidate ranking signals to canonical URL
56,high,redirect,https://painclinics.com/?clinics=csar-pllc-dr-oliver-c-james-md,Redirect legacy /?clinics=csar-pllc-dr-oliver-c-james-md to canonical URL — ...,14139,Consolidate ranking signals to canonical URL
88,high,meta,https://painclinics.com/pain-management/the-pain-management-group-clarksvill...,"Improve meta for underperforming page — 13,659 impressions, 0.6% CTR, positi...",13659,Higher CTR from existing impressions
89,high,meta,https://painclinics.com/pain-management/relief-spine-pain-center-al-35968/,"Improve meta for underperforming page — 12,416 impressions, 0.3% CTR, positi...",12416,Higher CTR from existing impressions
57,high,redirect,https://painclinics.com/?clinics=access-pain-solutions-tulsa-pain-management...,Redirect legacy /?clinics=access-pain-solutions-tulsa-pain-management-doctor...,11988,Consolidate ranking signals to canonical URL
90,high,meta,https://painclinics.com/?clinics=access-pain-solutions-tulsa-pain-management...,"Improve meta for underperforming page — 11,988 impressions, 0.6% CTR, positi...",11988,Higher CTR from existing impressions


---
## Export Output CSVs

In [23]:
# Export all output CSVs
out = AUDIT_DIR

# 1. Query clusters
queries[['Query', 'Clicks', 'Impressions', 'CTR', 'Position', 'Cluster', 'PosBucket']].to_csv(
    out / 'query-clusters.csv', index=False
)

# 2. Page performance
pages[['Page', 'Clicks', 'Impressions', 'CTR', 'Position', 'PageType']].to_csv(
    out / 'page-performance.csv', index=False
)

# 3. Quick wins
quick_wins[['Query', 'Clicks', 'Impressions', 'CTR', 'Position', 'Cluster']].to_csv(
    out / 'quick-wins.csv', index=False
)

# 4. Content gaps (high-value non-brand queries)
high_value_nonbrand[['Query', 'Clicks', 'Impressions', 'CTR', 'Position', 'Cluster']].to_csv(
    out / 'content-gaps.csv', index=False
)

# 5. Action items
actions_df.to_csv(out / 'action-items.csv', index=False)

# 6. Index gaps (legacy + www URLs as proxy since full coverage export is date-level only)
index_gap_rows = []
for _, row in legacy_urls.iterrows():
    index_gap_rows.append({'url': row['Page'], 'issue': 'legacy-url-format', 'impressions': row['Impressions']})
for _, row in www_pages.iterrows():
    index_gap_rows.append({'url': row['Page'], 'issue': 'www-duplicate', 'impressions': row['Impressions']})

index_gaps_df = pd.DataFrame(index_gap_rows).sort_values('impressions', ascending=False) if index_gap_rows else pd.DataFrame(columns=['url', 'issue', 'impressions'])
index_gaps_df.to_csv(out / 'index-gaps.csv', index=False)

print('Exported files:')
for f in ['query-clusters.csv', 'page-performance.csv', 'quick-wins.csv', 'content-gaps.csv', 'action-items.csv', 'index-gaps.csv']:
    path = out / f
    if path.exists():
        rows = len(pd.read_csv(path))
        print(f'  {f}: {rows} rows')
    else:
        print(f'  {f}: NOT FOUND')

Exported files:
  query-clusters.csv: 1000 rows
  page-performance.csv: 1000 rows
  quick-wins.csv: 762 rows
  content-gaps.csv: 591 rows
  action-items.csv: 100 rows


  index-gaps.csv: 458 rows


---
## Summary

Review the exported CSVs and the analysis above. Key things to look for:

1. **Quick wins** — queries at position 4-15 where small content/meta improvements can move you to top 3
2. **CTR anomalies** — good positions but poor click-through rates mean bad titles/descriptions
3. **Near-me gaps** — high-impression generic queries where city/state pages need enrichment
4. **Legacy URL consolidation** — `/?clinics=` URLs still getting impressions need redirects
5. **Treatment content gaps** — treatment queries with no dedicated landing page

Proceed to Step 3 (implementation) based on the action-items.csv priorities.