In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import os
import json
import requests
import time
import re
from pytrends.request import TrendReq
import pytrends
import math
import itertools
from sklearn.preprocessing import minmax_scale
import pickle

In [14]:
def get_related_queries(kw_list: list) -> dict:
    
    related_queries = {}
    
    for keyword in kw_list:
    
        time.sleep(3)
    
        pytrend = TrendReq(hl='es-419', 
                           tz=360, 
                           timeout= None, 
                           retries = 0,
                           requests_args={'verify':True})
    
        pytrend.build_payload(kw_list=[keyword], geo='MX')
    
        time.sleep(3)
    
        try:
            related_queries[keyword] = pytrend.related_queries()
            print('crawled, yay')
    
        except Exception as e:
            print('oops, that went wrong')
            pass
    
    return related_queries


def get_gtrend_outputs(terms, 
                       input_date_start = '2018-11-01',
                       input_date_end = '2022-11-01',
                       gt_country_code = 'MX', 
                       search_type='web'):
    
    # Tidy inputs
    terms = [x.lower() for x in terms] 
    terms = [re.sub('\u200b','',x) for x in terms]
    date_text = input_date_start+" "+input_date_end

    # Get index outputs

    indices = {}
    failed = []
    failed2 =[]
    
    for term in terms:
        print(term)
        time.sleep(3)
        try:
            pytrend = TrendReq()
            pytrend.build_payload(kw_list=[term], geo=gt_country_code, timeframe = date_text, gprop = "")
            indices[term] = pytrend.interest_over_time()
        except:
            failed.append(term)
            print('failed to get ' + str(term))
            continue
    
    for term in failed:
        print(term)
        time.sleep(3)
        try:
            pytrend = TrendReq()
            pytrend.build_payload(kw_list=[term], geo=gt_country_code, timeframe = date_text, gprop = "")
            indices[term] = pytrend.interest_over_time()
        except:
            failed2.append(term)
            print('failed to get ' + str(term))
            continue
    
    for term in failed2:
        print(term)
        time.sleep(3)
        try:
            pytrend = TrendReq()
            pytrend.build_payload(kw_list=[term], geo=gt_country_code, timeframe = date_text, gprop = "")
            indices[term] = pytrend.interest_over_time()
        except:
            print('failed to get for the last time ' + str(term))
            continue
    
    
    colnames = list(indices.keys())
    
    print('number of cols retrieved = ' + str(len(colnames)) + ' out of ' + str(len(kw)))
    
    indices_pd = pd.concat([pd.concat([indices[colnames[i]]], axis=1) for i in range(len(colnames))],axis=1).drop('isPartial', axis=1)

    return indices_pd

In [4]:
#############################################################
# busquedas relacionadas ####################################

kw = ['radiadores', 'refacciones de autos', 'calentadores']

In [5]:
# esta función devuelve un diccionario donde cada key es el keyword (queda repetido el keyword ojo)
# y dentro hay otro diccionario donde las keys son 'top' y 'rising' y los valores son data frames con los top y rising 
# bsquedas relacionadas al keyword

suggested_queries = get_related_queries(kw)

crawled, yay
crawled, yay
crawled, yay


In [17]:
# ejemplo:
df1 = suggested_queries['radiadores']['radiadores']['top']
df2 = suggested_queries['radiadores']['radiadores']['rising']

In [None]:
#############################################################
# busquedas por fecha #######################################

In [23]:
gtrends = get_gtrend_outputs(df1['query'].to_list(), 
                             input_date_start = '2018-11-01',
                             input_date_end = '2022-11-01',
                             gt_country_code = 'MX', 
                             search_type='web')

radiador
mofles
autozone
venta de radiadores
taller de radiadores
reparacion de radiadores
refaccionaria
runsa radiadores
runsa
radiadores tijuana
radiadores monterrey
rasa
rasa radiadores
radiadores guadalajara
radiadores mexico
phar radiadores
reparación de radiadores
radiadores queretaro
radiadores california
radiadores chihuahua
radiadores de la frontera
deyac
deyac radiadores
radiadores cerca de mi
radiadores de autos
number of cols retrieved = 25 out of 3


In [24]:
gtrends

Unnamed: 0_level_0,radiador,mofles,autozone,venta de radiadores,taller de radiadores,reparacion de radiadores,refaccionaria,runsa radiadores,runsa,radiadores tijuana,...,phar radiadores,reparación de radiadores,radiadores queretaro,radiadores california,radiadores chihuahua,radiadores de la frontera,deyac,deyac radiadores,radiadores cerca de mi,radiadores de autos
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-11-04,51,62,46,14,0,33,69,0,50,0,...,0,0,17,38,0,0,33,0,0,45
2018-11-11,60,74,48,0,67,0,66,20,53,0,...,0,0,17,22,52,45,0,0,0,47
2018-11-18,53,70,52,19,0,22,75,24,65,25,...,0,0,85,0,0,0,43,0,0,0
2018-11-25,49,86,44,16,0,36,69,26,56,29,...,33,0,0,0,0,0,15,0,0,0
2018-12-02,57,59,48,65,0,17,72,20,59,0,...,0,0,0,0,0,87,48,34,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-02,82,74,72,20,59,15,85,26,47,0,...,12,35,20,21,0,17,55,14,25,0
2022-10-09,89,72,75,0,35,23,83,0,52,31,...,11,0,27,40,26,27,31,15,18,0
2022-10-16,86,78,71,0,53,22,70,0,42,0,...,12,21,0,0,10,24,40,28,29,38
2022-10-23,83,71,74,0,51,17,76,25,46,25,...,14,30,0,27,33,20,22,0,0,22
