# Recording Google Search Results and Animating Result Differences

Scenario: Retrieving Google SERP with a regular frequency and aggregating the data on the SERP for SEO Insights along with animating

- Schedule for Scheduling SERP Retrieving Timing
- Advertools for Using the Custom Search API of Google
- Pandas for **Data Maniuplation**
- Plotly for Animating the Differences
- Glob for Taking the CSV Files from Directory
- Time for Using the Schedule
- Datetime for Changing the Output Names with the actual time

In [1]:
import schedule 
import advertools as adv
import pandas as pd
import plotly.express as px 
import glob
import time
from datetime import datetime

Necessary Function for Recording SERP

In [None]:
cse_id = "02b030ad62e8631d1" # Custom Search Engine ID
api_key = "AIzaSyDirWxSmLFwksHxtwcQp1UQB-5ux-u4ipg" # API Key for Custom Search API of Google


"""
Calls the function with the determined frequency, for the determined queries.
Creates a CSV Output with the name of the actual date which the function called.
"""

def record_serp():
    date = datetime.now()
    date = date.strftime("%d%m%Y%H_%M_%S")
    df = adv.serp_goog(
        q=['Calories in Pizza', "Calories in BigMac"], key=api_key, cx=cse_id)
    df.to_csv(f'serp{date}' + '_' + 'scheduled_serp.csv')


schedule.every(10).seconds.do(record_serp)




#For making the Schedule work.
n = 5
while True:
    schedule.run_pending()
    time.sleep(1)
    n += 1

2021-06-09 20:23:45,816 | INFO | serp.py:727 | serp_goog | Requesting: q=Calories in Pizza, cx=02b030ad62e8631d1, key=AIzaSyDirWxSmLFwksHxtwcQp1UQB-5ux-u4ipg
2021-06-09 20:23:46,486 | INFO | serp.py:727 | serp_goog | Requesting: q=Calories in BigMac, cx=02b030ad62e8631d1, key=AIzaSyDirWxSmLFwksHxtwcQp1UQB-5ux-u4ipg
2021-06-09 20:23:57,509 | INFO | serp.py:727 | serp_goog | Requesting: q=Calories in Pizza, cx=02b030ad62e8631d1, key=AIzaSyDirWxSmLFwksHxtwcQp1UQB-5ux-u4ipg
2021-06-09 20:23:58,122 | INFO | serp.py:727 | serp_goog | Requesting: q=Calories in BigMac, cx=02b030ad62e8631d1, key=AIzaSyDirWxSmLFwksHxtwcQp1UQB-5ux-u4ipg
2021-06-09 20:24:08,933 | INFO | serp.py:727 | serp_goog | Requesting: q=Calories in Pizza, cx=02b030ad62e8631d1, key=AIzaSyDirWxSmLFwksHxtwcQp1UQB-5ux-u4ipg
2021-06-09 20:24:09,451 | INFO | serp.py:727 | serp_goog | Requesting: q=Calories in BigMac, cx=02b030ad62e8631d1, key=AIzaSyDirWxSmLFwksHxtwcQp1UQB-5ux-u4ipg
2021-06-09 20:24:20,213 | INFO | serp.py:727 | se

## Some other Examples for Scheduling a Function

- schedule.every().hour.do(record_serp) => Do something at every hour's beginning
- schedule.every().day.at("13:25").do(record_serp) => Do something at 13:25, everyday.
- schedule.every(5).to(10).minutes.do(record_serp) => Do something in every five to 10 tecons
- schedule.every().monday.do(record_serp) => Do something in every Monday
- schedule.every().wednesday.at("13:15").do(record_serp) => Do something in every Wednesday
- schedule.every().minute.at(":17").do(record_serp) => Do something at every minutes' 17th second.

- for more: https://buildmedia.readthedocs.org/media/pdf/schedule/stable/schedule.pdf

Taking the SERP Recording Outputs as CSV

In [None]:
serp_csvs = sorted(glob.glob(("str*.csv")))
serp_csv = pd.concat((pd.read_csv(file) for file in serp_csvs), ignore_index=True)

In [None]:
df_1 = pd.read_csv("str2712202015_09_06_scheduled_serp.csv", index_col='queryTime')
df_2 = pd.read_csv("str2712202015_09_20_scheduled_serp.csv", index_col='queryTime')
df_3 = pd.read_csv("str2712202015_09_31_scheduled_serp.csv", index_col='queryTime')
df_4 = pd.read_csv("str2612202022_57_11_scheduled_serp.csv", index_col='queryTime')
united_serp_df = df_1.append([df_2,df_3,df_4])
united_serp_df.columns
serp_csv.drop(columns="Unnamed: 0", inplace=True)
serp_csv.set_index("queryTime", inplace=True)
serp_csv

SERP Recording for Calories in Pizza

In [None]:
pizza_serp = serp_csv[serp_csv['searchTerms'].str.contains("pizza", regex=True, case=False)]
pizza_serp['bubble_size'] = 35
fig = px.scatter(pizza_serp, x="displayLink", y="rank", animation_frame=pizza_serp.index, animation_group="rank",
            color="displayLink", hover_name="link", hover_data=["searchTerms","title","rank"],
           log_y=True, 
           height=900, width=1100, range_x=[-1,11], range_y=[1,11],  size="bubble_size", text="displayLink", template="plotly_white")
#fig['layout'].pop('updatemenus')
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2500
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 1000
fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20),
    paper_bgcolor="white",
)
fig.show(renderer='notebook')

In [None]:
bigmac_serp = serp_csv[serp_csv['searchTerms'].str.contains("bigmac", regex=True, case=False)]
bigmac_serp['bubble_size'] = 35
bigmac_serp

SERP Recording for "Calories in Big Mack"

In [None]:
fig = px.scatter(bigmac_serp, x="displayLink", y="rank", animation_frame=bigmac_serp.index, animation_group="rank",
            color="displayLink", hover_name="link", hover_data=["title"],
           log_y=True, 
           height=900, width=1100, range_x=[-1,11], range_y=[1,11],  size="bubble_size", text="displayLink", template="plotly_white")
#fig['layout'].pop('updatemenus')
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1000
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 1000
fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20)
)
fig.show(renderer='notebook')

In [None]:
culinary_fruits_df = pd.read_html("https://en.wikipedia.org/wiki/List_of_culinary_fruits", header=0)
culinary_fruits_merge_df = pd.concat(culinary_fruits_df)

In [None]:
culinary_fruits_queries = ["calories in " + i.lower() for i in culinary_fruits_merge_df['Common name']] + ["nutrition in " + i.lower() for i in culinary_fruits_merge_df['Common name']]
culinary_fruits_queries

In [None]:
serp_df = adv.serp_goog(cx=cse_id, key=api_key, q=culinary_fruits_queries[0:30], gl=["us"])

In [None]:
#serp_df.to_csv("serp_calories-2.csv")
serp_df2 = pd.read_csv("serp_calories-2.csv")

In [None]:
serp_df = pd.read_csv("serp_calories.csv")

In [None]:
serp = serp_df.append(serp_df2)

In [None]:
serp

Taking the Top 10 Domain and Changing the Date Type for Better Animation Conditions

In [None]:
top10_domains = serp.displayLink.value_counts()[:10].index
top10_df = serp[serp['displayLink'].isin(top10_domains)] 
top10_df.drop(columns={"Unnamed: 0"}, inplace=True)
top10_df['queryTime']

In [None]:
top10_df['queryTime'] = top10_df['queryTime'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f+00:00"))

In [None]:
top10_df['queryDay'] = top10_df['queryTime'].apply(lambda x: datetime.strftime(x, "%Y-%m-%d"))
top10_df['queryDay']

In [None]:
top10_df.sort_values("queryDay", inplace=True)

10 Day Difference for the Same 30 Query and Their Results for the Top 10 Domains

In [None]:
fig = px.bar(top10_df, x="displayLink", y="rank", color="rank", hover_name="link", hover_data=["title","link","searchTerms"],
           log_y=True, 
           height=1000, width=1000, template="plotly_white", facet_col="queryDay", facet_col_wrap=1)
#fig['layout'].pop('updatemenus')
#px.bar(top10_df, x="displayLink", y="rank", color="rank", height=500, width=850, hover_name=top10_df["link"], hover_data=["searchTerms","link","title"])
"""fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 3000
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 1500
fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20)
)"""
fig.update_yaxes(matches=None, categoryorder="total ascending")
fig.show(renderer='notebook')

Comparing Two Different Times

In [None]:
fig = px.bar(top10_df, x="displayLink", y="rank", animation_frame=top10_df["queryDay"], animation_group="rank",
            color="rank", hover_name="link", hover_data=["title","link","searchTerms"],
           log_y=False, 
           height=1000, width=1000, range_x=[-1,15], range_y=[1,20], text="displayLink", template="plotly_white")
#fig['layout'].pop('updatemenus')
#px.bar(top10_df, x="displayLink", y="rank", color="rank", height=500, width=850, hover_name=top10_df["link"], hover_data=["searchTerms","link","title"])
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 3000
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 1500
fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20)
)
fig.show(renderer='notebook')

Comparing Different Queries At the Same Time

#Coming