In [3]:
# Import needed packages
import csv
import search
import simulate_keywords
from google_client import GoogleClient

ModuleNotFoundError: No module named 'googleapiclient'

In [4]:
# %env TRENDS_DEVELOPER_KEY=

In [5]:
# Control your initial search terms which you would like to run the simulation for here
initial_search_terms = ["food banks near me"]

In [5]:
def load_geolocations(): 
    locations = []
    with open(simulate_keywords.Simulation.LOCATIONS_FILE, "r") as csvfile: 
        reader = csv.DictReader(csvfile)
        for row in reader: 
            code = row["geo_code"]
            description = row["description"]
            locations.append({"code": code, "description": description})
    return locations

In [6]:
"""
Generate master list of top queries for all geolocations during the specified time period
Get relative search volume of top queries for initial search term
"""
def run_simulation(initial_search_term, startDateTrends='2020-01', endDateTrends='2020-08', startDateTimelines='2020-01-01', endDateTimelines='2020-08-31'): 
    master_list = set()
    relative_search_volumes = dict()
    for loc in load_geolocations(): 
                simulation = simulate_keywords.Simulation(initial_search_term, loc, startDateTrends, endDateTrends, startDateTimelines, endDateTimelines)
        
        simulation.generate_keywords()
        simulation.get_relative_search_volumes()
        simulation.generate_simulation_csvs()
        top_queries = [q['query'] for q in simulation.initial_queries]
        for query in top_queries: 
             master_list.add(query)
        relative_search_volumes[loc['code']] = simulation.relative_search_volumes
    return master_list, relative_search_volumes

In [7]:
"""
Run the simulation for all initial search terms 
Generates a dictionary mapping initial_search_term to its master list for all locations
Generates a dictionary mapping initial_serach_term to its relative search volumes for all locations
"""
all_master_lists = dict()
all_relative_search_volumes = dict()
for initial_search_term in initial_search_terms: 
    master_list, relative_search_volumes = run_simulation(initial_search_term) # Note: must specify explicit startDate & endDate unless wish to use default values
    all_master_lists[initial_search_term] = list(master_list)
    all_relative_search_volumes[initial_search_term] = relative_search_volumes 

In [8]:
all_relative_search_volumes

{'food banks near me': {'US': [{'open food banks near me': 0.003987066249247087},
   {'food open near me': 0.5313591941026181},
   {'food bank': 0.3163857752534503},
   {'food bank near me': 0.03982112928206027},
   {'food banks near me today': 0.0025479263587956397},
   {'food banks near me open today': 0.00214558601643791},
   {'food banks open today': 0.0036053824776487646},
   {'food pantry near me': 0.028606126693417493},
   {'free food banks near me': 0.001390265741879042},
   {'free food near me': 0.04415713917114515},
   {'local food banks near me': 0.0010344720279345677},
   {'local food near me': 0.011197732638669548},
   {'local food banks': 0.002656164214333986},
   {'food pantries near me': 0.008957594816649121},
   {'good banks near me': 0.001355467030962015},
   {'food banks near me open now': 0.00048798993612460864},
   {'pet food banks near me': 0.00030498798862655296}],
  'US-CA': [{'open food banks near me': 0.004182671622676035},
   {'food open near me': 0.467695339

In [9]:
"""
Get the top sites and the normalized site probabilities for each initial search term
"""
for initial_search_term, location_dicts in all_relative_search_volumes.items(): 
    # We can only get the top sites at the US level, so we ignore all other locations
    # This will generate CSV files in output/search
    search.main(location_dicts['US'], initial_search_term)