In [1]:
!pip install pytrends

Collecting pytrends
  Downloading pytrends-4.9.2-py3-none-any.whl (15 kB)
Installing collected packages: pytrends
Successfully installed pytrends-4.9.2


# Guide

* https://hackernoon.com/how-to-use-google-trends-api-with-python
* https://forbrains.co.uk/international_tools/earth_timezones?ref=hackernoon.com

In [10]:
# connect to google 

from pytrends.request import TrendReq

pytrends = TrendReq(hl='en-US', tz=-420) 

In [11]:
kw_list = ["machine learning"] # list of keywords to get data 

pytrends.build_payload(kw_list, cat=0, timeframe='today 12-m')

In [12]:
#1 Interest over Time
data = pytrends.interest_over_time() 
data = data.reset_index() 


import plotly.express as px

fig = px.line(data, x="date", y=['machine learning'], title='Keyword Web Search Interest Over Time')
fig.show() 

TooManyRequestsError: The request failed: Google returned a response with code 429

In [13]:
pytrends.get_historical_interest(kw_list, year_start=2021, month_start=9, day_start=1, hour_start=0, year_end=2021, month_end=9, day_end=30, hour_end=0, cat=0, sleep=0)

NotImplementedError: This method has been removed for incorrectness. It will be removed completely in v5.
If you'd like similar functionality, please try implementing it yourself and consider submitting a pull request to add it to pytrends.
          
There is discussion at:
https://github.com/GeneralMills/pytrends/pull/542

In [15]:
import time
import json

# Create pytrends object
pytrends = TrendReq(hl='en-US', tz=-420) # 'De' means consider Germany as the search area 
# Set your keyword and time frame
keyword = ['Analytics'] # for example we give 'Data Science' as the search term 
timeframe = 'today 5-y'  # Time frame: last year

# Build the payload for the keyword
pytrends.build_payload(kw_list=keyword,
                       cat=0, # Category 
                       timeframe=timeframe,
                       geo='CR', # Geographic location, in this case 'Deutschland'
                       gprop='') # Google Search Property

interest_over_time_df = pytrends.interest_over_time()

TooManyRequestsError: The request failed: Google returned a response with code 429

In [17]:
!pip install curl_cffi

Collecting curl_cffi
  Downloading curl_cffi-0.6.4-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: curl_cffi
Successfully installed curl_cffi-0.6.4


In [18]:
import json
import urllib.parse
from datetime import datetime, timedelta
from curl_cffi import requests
import time

def build_payload(keywords, timeframe='now 7-d', geo='US'):
    token_payload = {
        'hl': 'en-US',
        'tz': '0',
        'req': {
            'comparisonItem': [{'keyword': keyword, 'time': timeframe, 'geo': geo} for keyword in keywords],
            'category': 0,
            'property': ''
        }
    }
    token_payload['req'] = json.dumps(token_payload['req'])
    return token_payload

def convert_to_desired_format(raw_data):
    trend_data = {}
    for entry in raw_data['default']['timelineData']:
        timestamp = int(entry['time'])
        date_time_str = datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
        value = entry['value'][0]
        trend_data[date_time_str] = value
    return trend_data

# Cookies
def get_google_cookies(impersonate_version='chrome110'):
    with requests.Session() as session:
        session.get("https://www.google.com", impersonate=impersonate_version)
        return session.cookies

def fetch_trends_data(keywords, days_ago=7, geo='US', hl='en-US', max_retries=5, browser_version='chrome110', browser_switch_retries=2):
    browser_versions = ['chrome110', 'edge101', 'chrome107', 'chrome104', 'chrome100', 'chrome101', 'chrome99']
    current_browser_version_index = browser_versions.index(browser_version)
    cookies = get_google_cookies(impersonate_version=browser_versions[current_browser_version_index])

    for browser_retry in range(browser_switch_retries + 1):
        data_fetched = False  # Reset data_fetched to False at the beginning of each browser_retry
        with requests.Session() as s:
            # phase 1: token
            for retry in range(max_retries):
                time.sleep(2)
                token_payload = build_payload(keywords)
                url = 'https://trends.google.com/trends/api/explore'
                params = urllib.parse.urlencode(token_payload)
                full_url = f"{url}?{params}"
                response = s.get(full_url, impersonate=browser_versions[current_browser_version_index], cookies=cookies)
                if response.status_code == 200:
                    content = response.text[4:]
                    try:
                        data = json.loads(content)
                        widgets = data['widgets']
                        tokens = {}
                        request = {}
                        for widget in widgets:
                            if widget['id'] == 'TIMESERIES':
                                tokens['timeseries'] = widget['token']
                                request['timeseries'] = widget['request']
                        break  # Break out of the retry loop as we got the token
                    except json.JSONDecodeError:
                        print(f"Failed to decode JSON while fetching token, retrying {retry + 1}/{max_retries}")
                else:
                    print(f"Error {response.status_code} while fetching token, retrying {retry + 1}/{max_retries}")
            else:
                print(f"Exceeded maximum retry attempts ({max_retries}) while fetching token. Exiting...")
                return None

            # phase 2: trends data
            for retry in range(max_retries):
                time.sleep(5)
                req_string = json.dumps(request['timeseries'], separators=(',', ':'))
                encoded_req = urllib.parse.quote(req_string, safe=':,+')
                url = f"https://trends.google.com/trends/api/widgetdata/multiline?hl={hl}&tz=0&req={encoded_req}&token={tokens['timeseries']}&tz=0"
                response = s.get(url, impersonate=browser_versions[current_browser_version_index], cookies=cookies)
                if response.status_code == 200:
                    content = response.text[5:]
                    try:
                        raw_data = json.loads(content)
                        # Convert raw data
                        trend_data = convert_to_desired_format(raw_data)
                        data_fetched = True  # Set data_fetched to True as we have successfully fetched the trend data
                        return trend_data
                    except json.JSONDecodeError:
                        print(f"Failed to decode JSON while fetching trends data, retrying {retry + 1}/{max_retries}")
                else:
                    print(f"Error {response.status_code} while fetching trends data, retrying {retry + 1}/{max_retries}")
            else:
                print(f"Exceeded maximum retry attempts ({max_retries}) while fetching trends data.")

        # change browser
        if not data_fetched and browser_retry < browser_switch_retries:
            time.sleep(5)
            current_browser_version_index = (current_browser_version_index + 1) % len(browser_versions)
            print(f"Switching browser version to {browser_versions[current_browser_version_index]} and retrying...")

    print(f"Exceeded maximum browser switch attempts ({browser_switch_retries}). Exiting...")
    return None

# Example
keywords = ["test"]
trends_data = fetch_trends_data(keywords)
print(trends_data)

Error 429 while fetching trends data, retrying 1/5
Error 429 while fetching trends data, retrying 2/5
Error 429 while fetching trends data, retrying 3/5
Error 429 while fetching trends data, retrying 4/5
Error 429 while fetching trends data, retrying 5/5
Exceeded maximum retry attempts (5) while fetching trends data.
Switching browser version to edge101 and retrying...
Error 429 while fetching trends data, retrying 1/5
Error 429 while fetching trends data, retrying 2/5
{'2024-05-22 18:00:00': 93, '2024-05-22 19:00:00': 96, '2024-05-22 20:00:00': 91, '2024-05-22 21:00:00': 86, '2024-05-22 22:00:00': 90, '2024-05-22 23:00:00': 86, '2024-05-23 00:00:00': 87, '2024-05-23 01:00:00': 86, '2024-05-23 02:00:00': 86, '2024-05-23 03:00:00': 94, '2024-05-23 04:00:00': 92, '2024-05-23 05:00:00': 93, '2024-05-23 06:00:00': 90, '2024-05-23 07:00:00': 86, '2024-05-23 08:00:00': 79, '2024-05-23 09:00:00': 75, '2024-05-23 10:00:00': 77, '2024-05-23 11:00:00': 80, '2024-05-23 12:00:00': 89, '2024-05-23 