<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Define-credentials-&amp;-connection-function" data-toc-modified-id="Define-credentials-&amp;-connection-function-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Define credentials &amp; connection function</a></span></li><li><span><a href="#Base-query-function" data-toc-modified-id="Base-query-function-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Base query function</a></span></li><li><span><a href="#Pagination-function" data-toc-modified-id="Pagination-function-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Pagination function</a></span></li><li><span><a href="#Time-variables" data-toc-modified-id="Time-variables-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Time variables</a></span><ul class="toc-item"><li><span><a href="#Get-sites-URL" data-toc-modified-id="Get-sites-URL-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Get sites URL</a></span></li></ul></li><li><span><a href="#Loop-over-countries" data-toc-modified-id="Loop-over-countries-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Loop over countries</a></span></li><li><span><a href="#Classificate-brand,-generic-and-cameras" data-toc-modified-id="Classificate-brand,-generic-and-cameras-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Classificate brand, generic and cameras</a></span></li><li><span><a href="#Parse-dates" data-toc-modified-id="Parse-dates-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Parse dates</a></span></li></ul></div>

In [1]:
import calendar
import datetime
import json
import pandas as pd
import re
import requests

from google.oauth2 import service_account
from googleapiclient.discovery import build

pd.set_option('max_colwidth', 150)

# Define credentials & connection function

In [2]:
key = 'credentials/group-lead-gen-f7a1d777494c.json'

In [3]:
#------------------------------------------- connection function -------------------------------------------

def connect(key):
    """Create a connection to the Google Search Console API and return service object.
    Args:
        key (string): Google Search Console JSON client secrets path.
    Returns:
        service (object): Google Search Console service object.
    """
    scope = ['https://www.googleapis.com/auth/webmasters']
    credentials = service_account.Credentials.from_service_account_file(key, scopes=scope)
    service = build(
        'webmasters',
        'v3',
        credentials=credentials
    )
    
    return service

# Base query function

In [4]:
#------------------------------------------- base query function -------------------------------------------

def query(service, site_url, payload):
    """Run a query on the Google Search Console API and return a dataframe of results.
    Args:
        service (object): Service object from connect()
        site_url (string): URL of Google Search Console property
        payload (dict): API query payload dictionary
    Return:
        df (dataframe): Pandas dataframe containing requested data. 
    """
    
    response = service.searchanalytics().query(siteUrl=site_url, body=payload).execute() 
    results = []
    for row in response['rows']:    
        data = {}
        
        for i in range(len(payload['dimensions'])):
            data[payload['dimensions'][i]] = row['keys'][i]

        data['clicks'] = row['clicks']
        data['impressions'] = row['impressions']
        data['ctr'] = round(row['ctr'] * 100, 2)
        data['position'] = round(row['position'], 2)  
        data['country'] = site_url[site_url.rindex('.')+1:]
        results.append(data)
    
    df = pd.DataFrame.from_dict(results)
    
    return(df)

# Pagination function

In [5]:
#------------------------------------------- pagination function -------------------------------------------

def paginated(service, site_url, payload):
    """Run a query on the Google Search Console API and return a dataframe paginating over results.
    Args:
        service (object): Service object from connect()
        site_url (string): URL of Google Search Console property
        payload (dict): API query payload dictionary
    Return:
        df (dataframe): Pandas dataframe containing requested data. 
    """
    
    results = []
    response = service.searchanalytics().query(siteUrl=site_url, body=payload).execute()
    c = 0

    while 'rows' in response:
        
        print('Working on loop', c)
        
        for row in response['rows']:    
            data = {}

            for i in range(len(payload['dimensions'])):
                data[payload['dimensions'][i]] = row['keys'][i]

            data['clicks'] = row['clicks']
            data['impressions'] = row['impressions']
            data['ctr'] = round(row['ctr'] * 100, 2)
            data['position'] = round(row['position'], 2)
            data['site_country'] = site_url[site_url.rindex('.')+1:]
            results.append(data)
        
        c+=1
        payload['startRow'] = payload['rowLimit'] * c
        response = service.searchanalytics().query(siteUrl=site_url, body=payload).execute()
        
    df = pd.DataFrame.from_dict(results)
            
    return(df)

# Time variables

In [6]:
#------------------------------------------- check availability and define time period -------------------------------

today = datetime.datetime.today()
gsc_window = datetime.timedelta(days=480)
first_available = today - gsc_window

endDate = today.strftime('%Y-%m-%d')
startDate = (first_available.replace(day=1) + datetime.timedelta(days=32)).replace(day=1).strftime('%Y-%m-%d')

In [8]:
today, first_available, endDate, startDate

(datetime.datetime(2022, 4, 27, 10, 48, 7, 406368),
 datetime.datetime(2021, 1, 2, 10, 48, 7, 406368),
 '2022-04-27',
 '2021-02-01')

## Get sites URL

In [9]:
#------------------------------------------- get all sites ------------------------------------------

site_list = connect(key).sites().list().execute()

In [10]:
#------------------------------------------- get verified sites ------------------------------------------

verified_sites_urls = [s['siteUrl'] for s in site_list['siteEntry']
                       if s['permissionLevel'] != 'siteUnverifiedUser'
                          and s['siteUrl'][:4] == 'http']


# Classificate brand, generic and cameras

In [11]:
re_branded = re.compile(r'.*v[eèé]r[iy]su.*', re.IGNORECASE)
re_branded_sd = re.compile(r'.*se[cg]urita.*', re.IGNORECASE)
re_cameras =  re.compile(r'.*[kc][aáà]m[aáàeéè]ra.*|.*monitor*', re.IGNORECASE)
re_sd = re.compile(r'.*es*|.*pt*', re.IGNORECASE)

In [12]:
#------------------------------------------- Queries categorization function -------------------------------

def query_cats(df):
    """Categorizes queries on Brand, Cameras and Generic:
        re_branded = re.compile(r'.*v[eèé]r[iy]su.*', re.IGNORECASE)
        re_branded_sd = re.compile(r'.*se[cg]urita.*', re.IGNORECASE) -- for ES & PT
        re_sd = re.compile(r'.*es*|.*pt*', re.IGNORECASE) -- for ES & PT
        re_cameras =  re.compile(r'.*[kc][aáà]m[aáàeéè]ra.*|.*monitor*', re.IGNORECASE)
    Arguments:
        df: data frame to categorize, containing 'query' and 'country' columns
    Return:
        df (dataframe): Pandas dataframe containing requested data. 
    """
    
    df['query_cat'] = df.apply(lambda row: 'Brand' if re_branded_sd.search(row.query) and re_sd.search(row.site_country)
                               else 'Brand' if re_branded.search(row.query)
                                                else ('Cameras' if re_cameras.search(row.query)
                                                          else 'Generic'), axis=1)
    
    return(df)

# Parse dates

In [13]:
#------------------------------------------- Parse dates function -------------------------------

def date_datetime(df):
    """Converts date to date, creates new date fields_
        year
        month
        week
        year_month
        year_week
    Arguments:
        df: data frame to parse, including 'date' column
    Return:
        df (dataframe): Pandas dataframe containing requested data. 
    """
    # convert date to date, create new date fields

    df['date'] = pd.to_datetime(df['date'])
    df['date'] = df['date'].dt.date
    
    return(df)

In [14]:
#------------------------------------------- Parse dates function -------------------------------

def parse_dates(df):
    """Converts date to date, creates new date fields_
        year
        month
        week
        year_month
        year_week
    Arguments:
        df: data frame to parse, including 'date' columns
    Return:
        df (dataframe): Pandas dataframe containing requested data. 
    """
    # convert date to date, create new date fields

    df['date'] = pd.to_datetime(df['date'])

    df['year'] = df.date.dt.year

    df['month'] = df['date'].apply(
        lambda x: x.strftime('%m'))

    df['week'] = df['date'].apply(
        lambda x: x.strftime('%W'))

    df['year_month'] = df.year.astype(
        str) + '-' + 'M' + df.month.astype(str)

    df['year_week'] = df.year.astype(
        str) + '-' + 'W' + df.week.astype(str)
    
    return(df)