In [None]:
!pip install pytrends

In [None]:
!pip install seaborn

In [1]:
from pytrends.request import TrendReq
import pandas as pd
import datetime
from matplotlib import pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import json
import time

In [2]:
timezone_offset = -330 # INDIA
# timezone_offset = -480 # Phillipines

pytrends = TrendReq(hl='en-US', tz=timezone_offset, retries=10, backoff_factor=0.5)

In [3]:
date_entry = input('Enter a start date in YYYY-MM-DD format: ')
year, month, day = map(int, date_entry.split('-'))
date1 = datetime.date(year, month, day)

Enter a start date in YYYY-MM-DD format: 2019-01-01


In [4]:
date_entry = input('Enter a end date in YYYY-MM-DD format: ')
year, month, day = map(int, date_entry.split('-'))
date2 = datetime.date(year, month, day)

Enter a end date in YYYY-MM-DD format: 2020-02-02


In [5]:
t_frame = date1.strftime("%Y-%m-%d") + " " + date2.strftime("%Y-%m-%d")
t_frame

'2019-01-01 2020-02-02'

In [6]:
cat_df = pd.read_csv("categories_defined.csv")
cat_names = cat_df["name"].tolist()
cat_ids = cat_df["id"].tolist()

In [7]:
from pandas.io.json._normalize import nested_to_record

def get_related_topics(catno, timeframe):
    """
    Gets related queries
    """
    kw_list = [""]
    # Create Pytrend Client
    pytrends.build_payload(cat=catno, kw_list=kw_list, timeframe=timeframe, geo="PH")
    
    # Form Request
    related_payload = dict()
    request_json = pytrends.related_topics_widget_list[0]
    related_payload["req"] = json.dumps(request_json["request"])
    related_payload["token"] = request_json["token"]
    related_payload["tz"] = pytrends.tz

    # Send Request
    req_json = pytrends._get_data(
        url=TrendReq.RELATED_QUERIES_URL,
        method=TrendReq.GET_METHOD,
        trim_chars=5,
        params=related_payload,
    )
    
    # return req_json
    # Tabulate Rising & Top searches.
    try:
        temp = req_json["default"]["rankedList"][0]["rankedKeyword"]
        top_df = pd.DataFrame([nested_to_record(d, sep='_') for d in temp])[["topic_title", "topic_type", "formattedValue"]]
    except KeyError:
        # in case no top queries are found, the lines above will throw a KeyError
        top_df = pd.DataFrame(columns=["topic_title", "topic_type", "formattedValue"])

    # rising queries
    try:
        temp = req_json["default"]["rankedList"][1]["rankedKeyword"]
        rising_df = pd.DataFrame([nested_to_record(d, sep='_') for d in temp])[["topic_title", "topic_type", "formattedValue"]]
    except KeyError:
        # in case no rising queries are found, the lines above will throw a KeyError
        rising_df = pd.DataFrame(columns=["topic_title", "topic_type", "formattedValue"])

    return(top_df,rising_df)

In [None]:
related_topics = pd.DataFrame()
for i in range(len(cat_ids)):
    print(i, cat_ids[i], cat_names[i])
    if(i%20==0):
        time.sleep(20)
    rt_top_df, rt_rising_df = get_related_topics(cat_ids[i], timeframe=t_frame)
    
    if(len(rt_top_df)==0):
        print(cat_names[i], "has no top queries")
    if(len(rt_rising_df)==0):
        print(cat_names[i], "has no rising queries")

    rt_top_df.columns = ["topic_title", "topic_type", "subject"]
    rt_rising_df.columns = ["topic_title", "topic_type", "subject"]
    
    rt_top_df["related_topics"] = "top"
    rt_rising_df["related_topics"] = "rising"
    rt_df = rt_rising_df.append(rt_top_df)
    
    rt_df = rt_top_df.append(rt_rising_df)
    
    rt_df["geo"] = "PH"
    rt_df["keyword"] = cat_names[i]
    rt_df["category"] = cat_ids[i]
    related_topics = related_topics.append(rt_df)

0 277 Alcoholic Beverages
1 68 Apparel
2 1228 Apparel Services
3 983 Athletic Apparel
4 1374 Babies & Toddlers
Babies & Toddlers has no top queries
Babies & Toddlers has no rising queries
5 115 Baby Care & Hygiene
Baby Care & Hygiene has no top queries
Baby Care & Hygiene has no rising queries
6 907 Baked Goods
7 37 Banking
8 1365 Bathroom
9 44 Beauty & Fitness
10 948 Bed & Bath
11 1369 Bedding & Bed Linens
Bedding & Bed Linens has no top queries
Bedding & Bed Linens has no rising queries
12 1366 Bedroom
13 1367 Beds & Headboards
Beds & Headboards has no top queries
Beds & Headboards has no rising queries
14 573 Camera & Photo Equipment
15 1383 Camera Lenses
Camera Lenses has no top queries
Camera Lenses has no rising queries
16 307 Cameras
Cameras has no top queries
Cameras has no rising queries
17 306 Cameras & Camcorders
Cameras & Camcorders has no top queries
Cameras & Camcorders has no rising queries
18 398 Campaigns & Elections
19 1213 Campers & RVs
20 429 Cancer
21 906 Candy & S

In [12]:
related_topics.reset_index().to_csv("related_topics.csv")