In [2]:
import argparse
import httplib2
import requests
 
from collections import defaultdict
from dateutil import relativedelta
from googleapiclient.discovery import build
from oauth2client import client
from oauth2client import file
from oauth2client import tools

In [3]:
def authorize_creds(creds,authorizedcreds='authorizedcreds.dat'):
    '''
    Authorize credentials using OAuth2.
    '''
    print('Authorizing Creds')
    # Variable parameter that controls the set of resources that the access token permits.
    SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly'] 
 
    # Path to client_secrets.json file
    CLIENT_SECRETS_PATH = creds
 
    # Create a parser to be able to open browser for Authorization
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        parents=[tools.argparser])
    flags = parser.parse_args([])
 
    # Creates an authorization flow from a clientsecrets file.
    # Will raise InvalidClientSecretsError for unknown types of Flows.
    flow = client.flow_from_clientsecrets(
        CLIENT_SECRETS_PATH, scope = SCOPES,
        message = tools.message_if_missing(CLIENT_SECRETS_PATH))
 
    # Prepare credentials and authorize HTTP
    # If they exist, get them from the storage object
    # credentials will get written back to the 'authorizedcreds.dat' file.
    storage = file.Storage(authorizedcreds)
    credentials = storage.get()
 
    # If authenticated credentials don't exist, open Browser to authenticate
    if credentials is None or credentials.invalid:
        credentials = tools.run_flow(flow, storage, flags)      # Add the valid creds to a variable
 
    # Take the credentials and authorize them using httplib2   
    http = httplib2.Http()                                      # Creates an HTTP client object to make the http request
    http = credentials.authorize(http=http)                     # Sign each request from the HTTP client with the OAuth 2.0 access token
    webmasters_service = build('searchconsole', 'v1', http=http)   # Construct a Resource to interact with the API using the Authorized HTTP Client.
 
    print('Auth Successful')
    return webmasters_service

In [5]:
if __name__ == '__main__':
    creds = 'api-json-gsc.json'
    webmasters_service = authorize_creds(creds) 

Authorizing Creds

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=307858290080-jdbltqmkqqg6v3vj3ud47oj8u58tl2b9.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fwebmasters.readonly&access_type=offline&response_type=code

If your browser is on a different machine then exit and re-run this
application with the command-line parameter

  --noauth_local_webserver





Authentication successful.
Auth Successful


In [11]:
site_list = webmasters_service.sites().list().execute()
 
verified_sites_urls = [s['siteUrl'] for s in site_list['siteEntry']
                       if s['permissionLevel'] != 'siteUnverifiedUser'
                          and s['siteUrl'][:4] == 'sc-d']
 
for site_url in verified_sites_urls:
  print( site_url)

sc-domain:wordcounter.ai
sc-domain:lettersolver.com
sc-domain:wordfinderx.com
sc-domain:word.tips
sc-domain:crossword-solver.io


In [35]:


def execute_request(service, property_uri, request):
    return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()



In [64]:
from datetime import datetime, timedelta

list_all = []
today = datetime.today().strftime('%Y-%m-%d')

for dates in reversed(range(3,366)):
    
    print("Extracting data for today minus " + str(dates))
    
    d = (datetime.today() - timedelta(days=dates)).strftime('%Y-%m-%d')

    request = {"startDate": d, "endDate": d, "dimensions": ["page"], "type": "web"}

    response = execute_request(webmasters_service, "sc-domain:crossword-solver.io" , request)


    for iteration in response['rows']:
        url = iteration["keys"][0]
        clicks = iteration["clicks"]
        impressions = iteration["impressions"]
        list_all.append([url,clicks,impressions,d])



Extracting data for today minus 365
Extracting data for today minus 364
Extracting data for today minus 363
Extracting data for today minus 362
Extracting data for today minus 361
Extracting data for today minus 360
Extracting data for today minus 359
Extracting data for today minus 358
Extracting data for today minus 357
Extracting data for today minus 356
Extracting data for today minus 355
Extracting data for today minus 354
Extracting data for today minus 353
Extracting data for today minus 352
Extracting data for today minus 351
Extracting data for today minus 350
Extracting data for today minus 349
Extracting data for today minus 348
Extracting data for today minus 347
Extracting data for today minus 346
Extracting data for today minus 345
Extracting data for today minus 344
Extracting data for today minus 343
Extracting data for today minus 342
Extracting data for today minus 341
Extracting data for today minus 340
Extracting data for today minus 339
Extracting data for today mi

Extracting data for today minus 137
Extracting data for today minus 136
Extracting data for today minus 135
Extracting data for today minus 134
Extracting data for today minus 133
Extracting data for today minus 132
Extracting data for today minus 131
Extracting data for today minus 130
Extracting data for today minus 129
Extracting data for today minus 128
Extracting data for today minus 127
Extracting data for today minus 126
Extracting data for today minus 125
Extracting data for today minus 124
Extracting data for today minus 123
Extracting data for today minus 122
Extracting data for today minus 121
Extracting data for today minus 120
Extracting data for today minus 119
Extracting data for today minus 118
Extracting data for today minus 117
Extracting data for today minus 116
Extracting data for today minus 115
Extracting data for today minus 114
Extracting data for today minus 113
Extracting data for today minus 112
Extracting data for today minus 111
Extracting data for today mi

In [68]:
import pandas as pd
df = pd.DataFrame(list_all, columns=["URL", "Clicks", "Impressions", "Date"])
df.to_csv('list.csv', index=False)

In [70]:
dict_x = {}
for x in list_all:
    values = [x[1],x[2],x[3]]
    key = x[0]
    
    try:
        dict_x[key].append(values)
    except KeyError:
        dict_x[key] = [values]

In [97]:
list_overperforming = []

for keys,values in dict_x.items():
    counter = 0
    for y in range (len(values)):
        if values[y][0] > 500:
            counter = counter + 1
    list_overperforming.append([keys,counter])
            

In [83]:
dict_x["https://crossword-solver.io/clue/intensify/"]

[[590, 2465, '2021-04-20'],
 [54, 294, '2021-04-21'],
 [16, 97, '2021-04-22'],
 [35, 402, '2021-04-28'],
 [157, 2882, '2021-04-29'],
 [15, 174, '2021-04-30'],
 [16, 82, '2021-05-01'],
 [35, 260, '2021-05-06'],
 [496, 6194, '2021-05-07'],
 [19, 68, '2021-05-12'],
 [63, 2194, '2021-05-14'],
 [22, 427, '2021-05-15'],
 [15, 66, '2021-05-18'],
 [37, 142, '2021-05-21'],
 [19, 107, '2021-05-29'],
 [52, 225, '2021-05-31'],
 [33, 131, '2021-06-17'],
 [29, 131, '2021-06-18'],
 [38, 114, '2021-06-19'],
 [53, 255, '2021-06-20'],
 [39, 169, '2021-06-21'],
 [24, 80, '2021-06-22'],
 [28, 88, '2021-06-23'],
 [22, 113, '2021-06-29'],
 [21, 132, '2021-07-01'],
 [16, 57, '2021-07-07'],
 [919, 4133, '2021-07-08'],
 [25, 427, '2021-07-09'],
 [27, 524, '2021-07-22'],
 [57, 239, '2021-07-26'],
 [63, 11206, '2021-08-20'],
 [30, 748, '2021-10-02'],
 [31, 136, '2021-10-19'],
 [100, 1832, '2021-10-22'],
 [65, 1000, '2021-10-23'],
 [82, 565, '2021-11-06'],
 [38, 226, '2021-11-10'],
 [43, 203, '2021-12-04'],
 [91,

In [98]:
one_timers = []
several_timers = []
no_spikes = []

for x in list_overperforming:
    if x[1] == 1:
        one_timers.append(x[0])
    elif x[1] == 0:
        no_spikes.append(x[0])
    else:
        several_timers.append(x[0])
        
        
        
        

In [105]:
list_evergreen = []

for keys,values in dict_x.items():
    counter = 0
    for y in range (len(values)):
        if values[y][0] > 50:
            counter = counter + 1
    if counter > 30:
        list_evergreen.append([keys])

In [125]:

#
#
#

new_no_spikes = []
for y in no_spikes:
    total_clicks = 0
    clicks = []
    dates = []
    for x in dict_x[y]:
        total_clicks = total_clicks + x[0]
        clicks.append(x[0])
        dates.append(x[2])
        
    new_no_spikes.append([y,total_clicks,clicks,dates])


In [132]:
df = pd.DataFrame(new_several_timers, columns=["URL", "Clicks", "Impressions", "Date"])
df.to_csv('several_timers.csv', index=False)


