<a href="https://colab.research.google.com/github/drusho/data_analysis/blob/gh-pages/Summertime_Google_Trends_(public).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# __Summer Google Trends__

_Notebook is inspired by:_

_https://medium.com/the-data-science-publication/how-to-use-the-pytrends-api-to-get-google-trends-data-4378acbaaa8a_

Goal of this noteboook is to explore Google Trends for topics and keywords related to Summer of 2021




In [1]:
pip install pytrends

Collecting pytrends
  Downloading https://files.pythonhosted.org/packages/96/53/a4a74c33bfdbe1740183e00769377352072e64182913562daf9f5e4f1938/pytrends-4.7.3-py3-none-any.whl
Installing collected packages: pytrends
Successfully installed pytrends-4.7.3


In [70]:
# import the TrendReq method from the pytrends request module
from pytrends.request import TrendReq

# execute the TrendReq method by passing the host language (hl) and timezone (tz) parameters
pytrends = TrendReq(hl='en-US', tz=360)

In [158]:
# build list of keywords
kw_list = ["summer","spring","winter","fall","autumn"] 

# build the payload
pytrends.build_payload(kw_list, timeframe='2019-06-03 2021-06-03', geo='US')

In [159]:
# import pandas module
import pandas as pd

# store interest over time information in df
df = pytrends.interest_over_time()

# display top trending results by date
df.sort_values(by="summer",ascending=False).head()

Unnamed: 0_level_0,summer,spring,winter,fall,autumn,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-06-16,76,27,12,22,2,False
2019-06-30,68,26,12,19,2,False
2019-06-23,66,26,12,19,2,False
2019-06-09,65,27,11,18,2,False
2019-07-07,64,25,12,24,2,False


In [164]:
import plotly.graph_objects as go

# Create traces
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df["summer"],
                    mode='lines',
                    name="Summer"))
                    # line = dict(color='cornflower blue')))


fig.add_trace(go.Scatter(x=df.index, y=df["spring"],
                    mode='lines',
                    name="Spring"))
                    # line = dict(color='black')))

fig.add_trace(go.Scatter(x=df.index, y=df["winter"],
                    mode='lines',
                    name="Winter"))
                    # line = dict(color='green')))

fig.add_trace(go.Scatter(x=df.index, y=df["fall"],
                    mode='lines',
                    name="Fall"))
                    # line = dict(color='green')))

fig.update_layout(title_text='Google Trends Interests Over Time',title_x=0.5)

fig.show()

In [165]:
# get interest by region
region_interests = pytrends.interest_by_region(resolution='REGION', inc_low_vol=False, inc_geo_code=True).sort_values(by="summer",ascending=False)
region_interests.geoCode = region_interests.geoCode.str.split(pat="-",expand=True,)[1]
region_interests.head(10)

Unnamed: 0_level_0,geoCode,summer,spring,winter,fall,autumn
geoName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Hawaii,HI,33,23,15,27,2
California,CA,29,27,16,26,2
Connecticut,CT,29,23,21,25,2
Delaware,DE,29,24,20,25,2
New Jersey,NJ,29,27,19,23,2
New York,NY,28,26,22,22,2
Louisiana,LA,27,25,17,29,2
Arizona,AZ,27,28,17,26,2
Washington,WA,27,23,22,26,2
New Hampshire,NH,27,20,26,25,2


In [182]:
fig = go.Figure(data=go.Choropleth(
    locations=region_interests['geoCode'], # Spatial coordinates
    z = region_interests['summer'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'oranges',
    colorbar_title = "Interests",
))

fig.update_layout(
    title_text = "Interest by Region for keyword 'summer'",
    geo_scope='usa', title_x=0.5 # limite map scope to USA
)

fig.show()

In [176]:
# get seach suggestions for "summer"
suggestions = pd.DataFrame.from_dict(pytrends.suggestions("summer"))
suggestions.iloc[:,[1,2]]

Unnamed: 0,title,type
0,Summer,Topic
1,Summer Phoenix,American actress
2,Daylight saving time,Time zone
3,Summer house,Building use
4,2020 Summer Olympics,Olympic games


In [177]:
# build the payload
pytrends.build_payload(kw_list=["summer"], 
                       timeframe='2020-01-01 2021-06-03', 
                       geo='US') 

# get related topics
df_rt = pytrends.related_topics()
df_rt

{'summer': {'rising':     value  ...                      topic_type
  0     750  ...              Song by Bananarama
  1     160  ...               Television series
  2      90  ...                    Building use
  3      80  ...                   Olympic games
  4      80  ...                            Film
  5      70  ...            Song by Taylor Swift
  6      70  ...                  Music Festival
  7      70  ...                           Plant
  8      60  ...                           Topic
  9      60  ...                          Squash
  10     60  ...                        Clothing
  11     60  ...                           Topic
  12     60  ...  American professional wrestler
  13     50  ...                         Garment
  14     40  ...                          Season
  15     40  ...                           Topic
  
  [16 rows x 6 columns], 'top':     value  ...                      topic_type
  0     100  ...                           Topic
  1      14  ...

In [180]:
# top 10 rising results for 'summer'
rising = df_rt['summer']["rising"]
rising[['value','topic_title','topic_type']]

Unnamed: 0,value,topic_title,topic_type
0,750,Cruel Summer,Song by Bananarama
1,160,Summer House,Television series
2,90,Summer house,Building use
3,80,2020 Summer Olympics,Olympic games
4,80,Hot Summer Nights,Film
5,70,Cruel Summer,Song by Taylor Swift
6,70,Summer Camp Music Festival,Music Festival
7,70,Squash,Plant
8,60,Daytime,Topic
9,60,Summer squash,Squash


In [183]:
# top 10 rising search results for summer
top_results = df_rt['summer']["rising"].head(10)

fig = go.Figure(data=[go.Bar(
    x=top_results.topic_title,
    y=top_results.value)])

fig.update_layout(title_text="Top 10 Rising Google Results for 'summer'",title_x=0.5)

fig.show()

In [154]:
# top 10 results for Mothers Day
top_results = df_rt['summer']["top"].head(10)

fig = go.Figure(data=[go.Bar(
    x=top_results.topic_title,
    y=top_results.value)])

fig.update_layout(title_text="Top 10 Google Results for 'summer'",title_x=0.5)

fig.show()