In [None]:
# | default_exp gsc_storage


In [None]:
# | export
from sqlmodel import Session, select
from seo_rat.models import GSCAnalytics
from typing import List, Dict
from datetime import datetime
from seo_rat.gsc_client import GSCAuth


In [None]:
# | export
def store_gsc_data(session: Session, site_url: str, date: str, rows: List[Dict]):
    """Store GSC data with upsert (update or insert)"""
    for row in rows:
        # Check if record exists
        stmt = select(GSCAnalytics).where(
            GSCAnalytics.site_url == site_url,
            GSCAnalytics.date == date,
            GSCAnalytics.query == row["keys"][0] if len(row["keys"]) > 0 else None,
            GSCAnalytics.page == row["keys"][1] if len(row["keys"]) > 1 else None,
            GSCAnalytics.country == row["keys"][2] if len(row["keys"]) > 2 else None,
            GSCAnalytics.device == row["keys"][3] if len(row["keys"]) > 3 else None,
        )
        existing = session.exec(stmt).first()

        if existing:
            # Update
            existing.clicks = row["clicks"]
            existing.impressions = row["impressions"]
            existing.ctr = row["ctr"]
            existing.position = row["position"]
        else:
            # Insert
            analytics = GSCAnalytics(
                site_url=site_url,
                date=date,
                query=row["keys"][0] if len(row["keys"]) > 0 else None,
                page=row["keys"][1] if len(row["keys"]) > 1 else None,
                country=row["keys"][2] if len(row["keys"]) > 2 else None,
                device=row["keys"][3] if len(row["keys"]) > 3 else None,
                clicks=row["clicks"],
                impressions=row["impressions"],
                ctr=row["ctr"],
                position=row["position"],
            )
            session.add(analytics)

    session.commit()


In [None]:
# | hide
from fastcore.test import test_eq
from seo_rat.gsc_client import GSCAuth, fetch_gsc_data, get_date_range
from seo_rat.sqlite_db import SQLiteDB
from sqlmodel import Session
from pprint import pprint


auth = GSCAuth()
start, end = get_date_range("last_days", days=30)
data = fetch_gsc_data(auth, "sc-domain:kareemai.com", start, end)


In [None]:
db = SQLiteDB()
with db.get_session() as session:
    store_gsc_data(session, "sc-domain:kareemai.com", start, data[:5])
    # verfiy stored
    test_eq(len(session.exec(select(GSCAnalytics)).all()) > 4, True)

In [None]:
def get_top_queries(
    session: Session, 
    site_url: str, 
    start_date: str, 
    end_date: str,
    country: str = None,  # Add this
    limit: int = 10
) -> List[Dict]:
    """Get top queries, optionally filtered by country"""
    from sqlalchemy import func
    
    stmt = select(...)
    .where(
        GSCAnalytics.site_url == site_url,
        GSCAnalytics.date >= start_date,
        GSCAnalytics.date <= end_date,
    )
    
    # Add country filter if provided
    if country:
        stmt = stmt.where(GSCAnalytics.country == country)


In [None]:
# | export
def get_top_queries(
    session: Session,
    site_url: str,
    start_date: str,
    end_date: str,
    country=None,
    limit: int = 10,
) -> List[Dict]:
    """Get top performing queries"""
    from sqlalchemy import func

    stmt = (
        select(
            GSCAnalytics.query,
            func.sum(GSCAnalytics.clicks).label("total_clicks"),
            func.sum(GSCAnalytics.impressions).label("total_impressions"),
            func.avg(GSCAnalytics.position).label("avg_position"),
            func.avg(GSCAnalytics.ctr).label("avg_ctr"),
        )
        .where(
            GSCAnalytics.site_url == site_url,
            GSCAnalytics.date >= start_date,
            GSCAnalytics.date <= end_date,
            GSCAnalytics.query.isnot(None),
        )
        .group_by(GSCAnalytics.query)
        .order_by(func.sum(GSCAnalytics.clicks).desc())
        .limit(limit)
    )
    if country:
        stmt = stmt.where(GSCAnalytics.country == country)

    return [dict(row._mapping) for row in session.exec(stmt)]


In [None]:
# | hide

test_top_queries = get_top_queries(
    session,
    site_url="sc-domain:kareemai.com",
    start_date=start,
    end_date=end,
    country="egy",
)

In [None]:
test_top_queries

[{'query': 'هواوي ايربودز 7i',
  'total_clicks': 1,
  'total_impressions': 1,
  'avg_position': 2.0,
  'avg_ctr': 1.0},
 {'query': 'oneplus pad 3',
  'total_clicks': 1,
  'total_impressions': 32,
  'avg_position': 12.474509803921569,
  'avg_ctr': 0.029411764705882353},
 {'query': 'huawei freebuds 5i vs 7i',
  'total_clicks': 1,
  'total_impressions': 3,
  'avg_position': 1.0,
  'avg_ctr': 0.3333333333333333},
 {'query': 'huawei buds 7i review',
  'total_clicks': 1,
  'total_impressions': 2,
  'avg_position': 1.0,
  'avg_ctr': 0.5},
 {'query': 'freebuds 7i review',
  'total_clicks': 1,
  'total_impressions': 47,
  'avg_position': 2.485144927536232,
  'avg_ctr': 0.043478260869565216},
 {'query': 'وان بلس باد 3',
  'total_clicks': 0,
  'total_impressions': 1,
  'avg_position': 2.0,
  'avg_ctr': 0.0},
 {'query': 'هواوي فري بودز 7i',
  'total_clicks': 0,
  'total_impressions': 5,
  'avg_position': 5.375,
  'avg_ctr': 0.0},
 {'query': 'هواوي freebuds 7i',
  'total_clicks': 0,
  'total_impres

In [None]:
# | export
def get_top_queries_excluding_pages(
    session: Session,
    site_url: str,
    start_date: str,
    end_date: str,
    exclude_pages: List[str],
    country=None,
    limit: int = 10,
) -> List[Dict]:
    """Get top queries excluding certain pages"""
    from sqlalchemy import func, not_, or_

    # Build exclusion filter
    filters = [GSCAnalytics.page.contains(page) for page in exclude_pages]

    stmt = (
        select(
            GSCAnalytics.query,
            func.sum(GSCAnalytics.clicks).label("total_clicks"),
            func.sum(GSCAnalytics.impressions).label("total_impressions"),
        )
        .where(
            GSCAnalytics.site_url == site_url,
            GSCAnalytics.date >= start_date,
            GSCAnalytics.date <= end_date,
            not_(or_(*filters)),  # Exclude pages
        )
        .group_by(GSCAnalytics.query)
        .order_by(func.sum(GSCAnalytics.clicks).desc())
        .limit(limit)
    )
    if country:
        stmt = stmt.where(GSCAnalytics.country == country)

    return [dict(row._mapping) for row in session.exec(stmt)]


In [None]:
# | hide
from sqlalchemy.testing.exclusions import exclude

test_exclude_page_from_top_queries = get_top_queries_excluding_pages(
    session,
    site_url="sc-domain:kareemai.com",
    start_date=start,
    end_date=end,
    exclude_pages=[
        "https://kareemai.com/blog/posts/products_reviews/Huawei%20freebuds%207i.html",
        "https://kareemai.com/blog/posts/products_reviews/Huawei%20freebuds%205i.html",
    ],
    country="egy",
)
pprint(test_exclude_page_from_top_queries)

[{'query': 'oneplus pad 3', 'total_clicks': 1, 'total_impressions': 32},
 {'query': 'وان بلس باد 3', 'total_clicks': 0, 'total_impressions': 1},
 {'query': 'ساعة هواوي gt4', 'total_clicks': 0, 'total_impressions': 1},
 {'query': 'أفضل أنواع الكرياتين للتضخيم',
  'total_clicks': 0,
  'total_impressions': 1},
 {'query': 'tokenization in arabic', 'total_clicks': 0, 'total_impressions': 1},
 {'query': 'tarteel jobs', 'total_clicks': 0, 'total_impressions': 2},
 {'query': 'tarteel careers', 'total_clicks': 0, 'total_impressions': 7},
 {'query': 'tarteel ai careers', 'total_clicks': 0, 'total_impressions': 3},
 {'query': 'pylate', 'total_clicks': 0, 'total_impressions': 3},
 {'query': 'proteinai', 'total_clicks': 0, 'total_impressions': 1}]


In [None]:
# | export
def get_page_analytics(
    session: Session, site_url: str, page_path: str, start_date: str, end_date: str
) -> Dict:
    """Get analytics for specific page"""
    from sqlalchemy import func

    stmt = select(GSCAnalytics).where(
        GSCAnalytics.site_url == site_url,
        GSCAnalytics.page.contains(page_path),
        GSCAnalytics.date >= start_date,
        GSCAnalytics.date <= end_date,
    )

    results = session.exec(stmt).all()

    return {
        "page_path": page_path,
        "total_clicks": sum(r.clicks for r in results),
        "total_impressions": sum(r.impressions for r in results),
        "avg_position": sum(r.position for r in results) / len(results)
        if results
        else 0,
        "avg_ctr": sum(r.ctr for r in results) / len(results) if results else 0,
        "top_queries": list(set(r.query for r in results if r.query))[:10],
    }


In [None]:
# | hide
test_page_analytics = get_page_analytics(
    session,
    site_url="sc-domain:kareemai.com",
    page_path="https://kareemai.com",
    start_date=start,
    end_date=end,
)
pprint(test_page_analytics)

{'avg_ctr': 0.012163857114188239,
 'avg_position': 8.890007641405472,
 'page_path': 'https://kareemai.com',
 'top_queries': ['huawei freebuds 7',
                 'huawei earbuds 7',
                 'huawei freebuds 7i sound quality',
                 'huawei freebuds 7i product info and reviews',
                 'huawei 7i buds',
                 'son buenos?',
                 'freebuds5i',
                 'huawei gt watch review',
                 'huawei freebuds 7i box',
                 'huawei earbuds 7i price in pakistan'],
 'total_clicks': 97,
 'total_impressions': 10114}


In [None]:
# | export
def get_analytics_by_date_range(
    session: Session, site_url: str, start_date: str, end_date: str
) -> List[GSCAnalytics]:
    """Get all analytics for date range"""
    stmt = select(GSCAnalytics).where(
        GSCAnalytics.site_url == site_url,
        GSCAnalytics.date >= start_date,
        GSCAnalytics.date <= end_date,
    )
    return session.exec(stmt).all()


In [None]:
# | hide

test_get_analytics_by_date_range = get_analytics_by_date_range(
    session, site_url="sc-domain:kareemai.com", start_date=start, end_date=end
)
pprint(test_get_analytics_by_date_range)

[GSCAnalytics(id=48968, query='audífonos huawei freebuds 5i', country='usa', clicks=0, ctr=0.0, created_at=datetime.datetime(2026, 1, 16, 21, 2, 59, 820373), page='https://kareemai.com/blog/posts/products_reviews/Huawei%20freebuds%205i.html', site_url='sc-domain:kareemai.com', date='2025-12-14', device='MOBILE', impressions=1, position=1.0),
 GSCAnalytics(id=48969, query='audífonos huawei freebuds 7i', country='col', clicks=0, ctr=0.0, created_at=datetime.datetime(2026, 1, 16, 21, 2, 59, 821063), page='https://kareemai.com/blog/posts/products_reviews/Huawei%20freebuds%207i.html', site_url='sc-domain:kareemai.com', date='2025-12-14', device='MOBILE', impressions=1, position=1.0),
 GSCAnalytics(id=48970, query='audífonos huawei freebuds 7i', country='mex', clicks=0, ctr=0.0, created_at=datetime.datetime(2026, 1, 16, 21, 2, 59, 821955), page='https://kareemai.com/blog/posts/products_reviews/Huawei%20freebuds%207i.html', site_url='sc-domain:kareemai.com', date='2025-12-14', device='MOBILE'

In [None]:
# | export
def get_trends(
    session: Session,
    site_url: str,
    start_date: str,
    end_date: str,
    dimension: str = None,
) -> List[Dict]:
    """Get trends over time, optionally grouped by dimension"""
    from sqlalchemy import func

    if dimension:
        stmt = (
            select(
                GSCAnalytics.date,
                getattr(GSCAnalytics, dimension),
                func.sum(GSCAnalytics.clicks).label("clicks"),
                func.sum(GSCAnalytics.impressions).label("impressions"),
                func.avg(GSCAnalytics.position).label("avg_position"),
                func.avg(GSCAnalytics.ctr).label("avg_ctr"),
            )
            .where(
                GSCAnalytics.site_url == site_url,
                GSCAnalytics.date >= start_date,
                GSCAnalytics.date <= end_date,
            )
            .group_by(GSCAnalytics.date, getattr(GSCAnalytics, dimension))
            .order_by(GSCAnalytics.date)
        )
    else:
        stmt = (
            select(
                GSCAnalytics.date,
                func.sum(GSCAnalytics.clicks).label("clicks"),
                func.sum(GSCAnalytics.impressions).label("impressions"),
                func.avg(GSCAnalytics.position).label("avg_position"),
                func.avg(GSCAnalytics.ctr).label("avg_ctr"),
            )
            .where(
                GSCAnalytics.site_url == site_url,
                GSCAnalytics.date >= start_date,
                GSCAnalytics.date <= end_date,
            )
            .group_by(GSCAnalytics.date)
            .order_by(GSCAnalytics.date)
        )

    return [dict(row._mapping) for row in session.exec(stmt)]


In [None]:
# | hide
test_get_trends = get_trends(
    session, site_url="sc-domain:kareemai.com", start_date=start, end_date=end
)
pprint(test_get_trends)

[{'avg_ctr': 0.00546448087431694,
  'avg_position': 8.189642324888228,
  'clicks': 1,
  'date': '2025-12-14',
  'impressions': 262},
 {'avg_ctr': 0.02843601895734597,
  'avg_position': 10.81911532385466,
  'clicks': 7,
  'date': '2025-12-15',
  'impressions': 293},
 {'avg_ctr': 0.0136986301369863,
  'avg_position': 6.706816699282453,
  'clicks': 3,
  'date': '2025-12-16',
  'impressions': 301},
 {'avg_ctr': 0.022026431718061675,
  'avg_position': 8.283590308370044,
  'clicks': 5,
  'date': '2025-12-17',
  'impressions': 299},
 {'avg_ctr': 0.021164021164021163,
  'avg_position': 9.289241622574956,
  'clicks': 5,
  'date': '2025-12-18',
  'impressions': 252},
 {'avg_ctr': 0.012280701754385965,
  'avg_position': 8.329720893141946,
  'clicks': 4,
  'date': '2025-12-19',
  'impressions': 285},
 {'avg_ctr': 0.015957446808510637,
  'avg_position': 7.992287234042553,
  'clicks': 3,
  'date': '2025-12-20',
  'impressions': 260},
 {'avg_ctr': 0.020512820512820513,
  'avg_position': 8.37863247863

In [None]:
# | export
def get_analytics_by_country(
    session: Session, site_url: str, country: str, start_date: str, end_date: str
) -> List[Dict]:
    """Get analytics filtered by country"""
    from sqlalchemy import func

    stmt = (
        select(
            GSCAnalytics.query,
            func.sum(GSCAnalytics.clicks).label("clicks"),
            func.sum(GSCAnalytics.impressions).label("impressions"),
        )
        .where(
            GSCAnalytics.site_url == site_url,
            GSCAnalytics.country == country,
            GSCAnalytics.date >= start_date,
            GSCAnalytics.date <= end_date,
        )
        .group_by(GSCAnalytics.query)
    )

    return [dict(row._mapping) for row in session.exec(stmt)]


In [None]:
# hide
test_get_analytics_by_country = get_analytics_by_country(
    session,
    site_url="sc-domain:kareemai.com",
    start_date=start,
    end_date=end,
    country="egy",
)
pprint(test_get_analytics_by_country)


[{'clicks': 0, 'impressions': 1, 'query': 'ai summary'},
 {'clicks': 0,
  'impressions': 2,
  'query': 'anker liberty 5 vs huawei freebuds 7i'},
 {'clicks': 0, 'impressions': 7, 'query': 'arabic tokenizer'},
 {'clicks': 0, 'impressions': 2, 'query': 'cloud gpu pricing comparison'},
 {'clicks': 0, 'impressions': 1, 'query': 'farasa segmenter arabic tokenizer'},
 {'clicks': 0, 'impressions': 2, 'query': 'free buds 7i'},
 {'clicks': 0, 'impressions': 1, 'query': 'freebuds 5i'},
 {'clicks': 0, 'impressions': 4, 'query': 'freebuds 5i review'},
 {'clicks': 0, 'impressions': 9, 'query': 'freebuds 6i vs 7i'},
 {'clicks': 0, 'impressions': 10, 'query': 'freebuds 7i'},
 {'clicks': 0, 'impressions': 3, 'query': 'freebuds 7i battery life'},
 {'clicks': 0, 'impressions': 2, 'query': 'freebuds 7i huawei'},
 {'clicks': 0, 'impressions': 1, 'query': 'freebuds 7i inceleme'},
 {'clicks': 1, 'impressions': 47, 'query': 'freebuds 7i review'},
 {'clicks': 0, 'impressions': 14, 'query': 'freebuds 7i review 

In [None]:
# | export
def get_analytics_by_device(
    session: Session, site_url: str, device: str, start_date: str, end_date: str
) -> List[Dict]:
    """Get analytics filtered by device"""
    from sqlalchemy import func

    stmt = (
        select(
            GSCAnalytics.query,
            func.sum(GSCAnalytics.clicks).label("clicks"),
            func.sum(GSCAnalytics.impressions).label("impressions"),
        )
        .where(
            GSCAnalytics.site_url == site_url,
            GSCAnalytics.device == device,
            GSCAnalytics.date >= start_date,
            GSCAnalytics.date <= end_date,
        )
        .group_by(GSCAnalytics.query)
    )

    return [dict(row._mapping) for row in session.exec(stmt)]


In [None]:
# hide
test_get_analytics_by_device = get_analytics_by_device(
    session,
    site_url="sc-domain:kareemai.com",
    start_date=start,
    end_date=end,
    device="mobile",
)
pprint(test_get_analytics_by_country)


[{'clicks': 0, 'impressions': 1, 'query': 'ai summary'},
 {'clicks': 0,
  'impressions': 2,
  'query': 'anker liberty 5 vs huawei freebuds 7i'},
 {'clicks': 0, 'impressions': 7, 'query': 'arabic tokenizer'},
 {'clicks': 0, 'impressions': 2, 'query': 'cloud gpu pricing comparison'},
 {'clicks': 0, 'impressions': 1, 'query': 'farasa segmenter arabic tokenizer'},
 {'clicks': 0, 'impressions': 2, 'query': 'free buds 7i'},
 {'clicks': 0, 'impressions': 1, 'query': 'freebuds 5i'},
 {'clicks': 0, 'impressions': 4, 'query': 'freebuds 5i review'},
 {'clicks': 0, 'impressions': 9, 'query': 'freebuds 6i vs 7i'},
 {'clicks': 0, 'impressions': 10, 'query': 'freebuds 7i'},
 {'clicks': 0, 'impressions': 3, 'query': 'freebuds 7i battery life'},
 {'clicks': 0, 'impressions': 2, 'query': 'freebuds 7i huawei'},
 {'clicks': 0, 'impressions': 1, 'query': 'freebuds 7i inceleme'},
 {'clicks': 1, 'impressions': 47, 'query': 'freebuds 7i review'},
 {'clicks': 0, 'impressions': 14, 'query': 'freebuds 7i review 

In [None]:
# | export
def store_single_date(session: Session, auth: GSCAuth, site_url: str, date: str) -> int:
    """Store GSC data for a single date"""
    from seo_rat.gsc_client import fetch_gsc_data

    rows = fetch_gsc_data(auth, site_url, date, date)
    store_gsc_data(session, site_url, date, rows)
    return len(rows)


In [None]:
# hide
# | test
count = store_single_date(session, auth, "sc-domain:kareemai.com", start)
test_eq(count > 0, True)


In [None]:
# | test
# Check what's actually stored
stored = session.exec(select(GSCAnalytics).limit(1)).first()
print(f"Query: {stored.query}")
print(f"Clicks: {stored.clicks}")
print(f"Date: {stored.date}")
test_eq(stored.site_url, "sc-domain:kareemai.com")


Query: huawei 7i review
Clicks: 1
Date: 2026-01-12


In [None]:
# | export
def store_date_range(
    session: Session, auth: GSCAuth, site_url: str, start_date: str, end_date: str
) -> Dict:
    """Store GSC data for a date range with progress"""
    from datetime import datetime, timedelta
    from seo_rat.gsc_client import fetch_gsc_data
    import time

    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")

    results = {"successful": [], "failed": [], "total_records": 0}
    current = start
    total_days = (end - start).days + 1

    for day_num in range(total_days):
        date_str = current.strftime("%Y-%m-%d")
        print(f"Processing {date_str} ({day_num + 1}/{total_days})...")

        try:
            rows = fetch_gsc_data(auth, site_url, date_str, date_str)
            store_gsc_data(session, site_url, date_str, rows)
            results["successful"].append(date_str)
            results["total_records"] += len(rows)
        except Exception as e:
            results["failed"].append({"date": date_str, "error": str(e)})

        current += timedelta(days=1)
        time.sleep(1)

    return results


In [None]:
# | test
result = store_date_range(session, auth, "sc-domain:kareemai.com", start, start)
test_eq(len(result["successful"]), 1)
print(f"Stored {result['total_records']} records")


Stored 191 records


In [None]:
r = session.exec(select(GSCAnalytics).limit(1)).first()
print(f"Query: {r.query}")
print(f"Page: {r.page}")
print(f"Country: {r.country}")
print(f"Device: {r.device}")
print(f"Clicks: {r.clicks}")
print(f"Impressions: {r.impressions}")
print(f"CTR: {r.ctr}")
print(f"Position: {r.position}")


Query: huawei 7i review
Page: https://kareemai.com/blog/posts/products_reviews/Huawei%20freebuds%207i.html
Country: deu
Device: MOBILE
Clicks: 1
Impressions: 2
CTR: 0.5
Position: 3.5


In [None]:
# | test
top = get_top_queries(session, "sc-domain:kareemai.com", start, start, limit=5)
for q in top:
    print(
        f"{q['query']}: {q['total_clicks']} clicks, {q['total_impressions']} impressions"
    )


هواوي ايربودز 7i: 1 clicks, 1 impressions
tarteel ai jobs: 0 clicks, 2 impressions
tarteel ai careers: 0 clicks, 3 impressions
tai nghe huawei freebuds 7i: 0 clicks, 1 impressions
soundcore liberty 5 vs huawei freebuds 7i: 0 clicks, 1 impressions


In [None]:
# | hide

from datetime import datetime, timedelta

#delay 3 because GSC api limitation 
end = datetime.now() - timedelta(days=3)
start = end - timedelta(days=365 * 1)

result = store_date_range(
    session,
    auth,
    "sc-domain:kareemai.com",
    start.strftime("%Y-%m-%d"),
    end.strftime("%Y-%m-%d"),
)


Processing 2025-01-13 (1/366)...
Processing 2025-01-14 (2/366)...
Processing 2025-01-15 (3/366)...
Processing 2025-01-16 (4/366)...
Processing 2025-01-17 (5/366)...
Processing 2025-01-18 (6/366)...
Processing 2025-01-19 (7/366)...
Processing 2025-01-20 (8/366)...
Processing 2025-01-21 (9/366)...
Processing 2025-01-22 (10/366)...
Processing 2025-01-23 (11/366)...
Processing 2025-01-24 (12/366)...
Processing 2025-01-25 (13/366)...
Processing 2025-01-26 (14/366)...
Processing 2025-01-27 (15/366)...
Processing 2025-01-28 (16/366)...
Processing 2025-01-29 (17/366)...
Processing 2025-01-30 (18/366)...
Processing 2025-01-31 (19/366)...
Processing 2025-02-01 (20/366)...
Processing 2025-02-02 (21/366)...
Processing 2025-02-03 (22/366)...
Processing 2025-02-04 (23/366)...
Processing 2025-02-05 (24/366)...
Processing 2025-02-06 (25/366)...
Processing 2025-02-07 (26/366)...
Processing 2025-02-08 (27/366)...
Processing 2025-02-09 (28/366)...
Processing 2025-02-10 (29/366)...
Processing 2025-02-11 (