# Search Terms

In this notebook we will search for the firebase term for each of our search original search queries.

In [69]:
import requests
import pandas as pd
import numpy as np

In [70]:
# Directory to save/read data
data_path = "data/"

# Read API_key (you need your own key for it to work)
with open(data_path+"API_key","r") as f:
    API_key = f.read()

In [71]:
base_request_prefix = "https://kgsearch.googleapis.com/v1/entities:search"

# https://kgsearch.googleapis.com/v1/entities:search?query=taylor+swift&key=API_KEY&limit=1&indent=True

### Create simple request to test functionality

In [72]:

params = {
    "query" : "Federal Bureau of Investigation", #FBI
    "key"   : API_key,
    "limit" : 10,  
    "indent": True
}
r = requests.get(base_request_prefix, params = params)
entity = r.json()
print(r.url)

https://kgsearch.googleapis.com/v1/entities:search?query=Federal+Bureau+of+Investigation&key=AIzaSyBM4RZcG5Jfel949oWndgzq_rIq-4PI_N8&limit=10&indent=True


In [73]:
entity

{'@context': {'detailedDescription': 'goog:detailedDescription',
  'kg': 'http://g.co/kg',
  'resultScore': 'goog:resultScore',
  'goog': 'http://schema.googleapis.com/',
  '@vocab': 'http://schema.org/',
  'EntitySearchResult': 'goog:EntitySearchResult'},
 '@type': 'ItemList',
 'itemListElement': [{'result': {'@type': ['GovernmentOrganization',
     'EducationalOrganization',
     'Organization',
     'Thing'],
    '@id': 'kg:/m/02_1m',
    'detailedDescription': {'license': 'https://en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License',
     'url': 'https://en.wikipedia.org/wiki/Federal_Bureau_of_Investigation',
     'articleBody': 'The Federal Bureau of Investigation is the domestic intelligence and security service of the United States and its principal federal law enforcement agency. '},
    'url': 'http://www.fbi.gov/',
    'description': 'Law enforcement agency',
    'name': 'Federal Bureau of Investigation'},
   'resultScore': 669

In [74]:
print(entity["itemListElement"][0]["result"]['@id'].split(":")[1])

/m/02_1m


### Create mapping from search query to Google Knowledge Graph Search Node

In [84]:
def create_search_terms_to_GKG_node_df(search_terms, domain_name, API_key):
    """Creates a dataframe mapping a list of search terms to their GKG node representation.
    We make the assumption that the first google result corresponds to the correct topic representation.
    This assumption should hold for our project as the terms are not ambuiguous.
    The returned dataframe has attributes:
    {search_term, node_equivalent, domain_name}

    Args:
        search_terms (list[string]): List of all required search queries to map
        domain_name (str): domain of the search queries (ie. Terrorims, Domestic, top-30 Terrorism 
        API_key (str): API key to use for the requests

    Returns:
        dataframe: dataframe mapping search term to their GKG node representation

    """

    # Create function which uses the GKG Api to find the corresponding node for one search query
    def map_search_term(search_term, API_key):
        """Maps one search query to the first google result search node representation.
        We make the assumption that the first google result corresponds to the correct topic representation.
        This assumption should hold for our project as the terms are not ambuiguous."""
        
        # Create base request url
        base_request_prefix = "https://kgsearch.googleapis.com/v1/entities:search"

        # Create parameters for the request
        params = {
            "query" : search_term,
            "key"   : API_key,
            "limit" : 10, # min is 10 but we care only about the first search
            "indent": True
        }
        r = requests.get(base_request_prefix, params = params)

        # Return the Google Knowledge Graph Search Node name
        return r.json()["itemListElement"][0]["result"]['@id'].split(":")[1]
    
    # Map search terms to node equivalent
    node_equivalents = [map_search_term(search_term, API_key) for search_term in search_terms]
    
    # Create domain column
    domain_column = [domain_name]*len(node_equivalents)
    
    # Create dictionary for the df
    data={
        "search_term" : search_terms,
        "node_equivalent" : node_equivalents,
        "domain_name" : domain_column
    }
    
    # return the mapping df
    return pd.DataFrame(data=data)
    
    

### Terrorism dataset Mapping

In [85]:
# Save dataframe to pickle
terrorism_df = pd.read_pickle(data_path+"terrorism.pkl")

In [86]:
terrorism_mapping_df = create_search_terms_to_GKG_node_df(terrorism_df.article_name.unique(), "terrorism", API_key)

In [87]:
terrorism_mapping_df # les /g/ marchent pas mhhh

Unnamed: 0,search_term,node_equivalent,domain_name
0,Al-Qaeda,/m/0v74,terrorism
1,terrorism,/m/07jq_,terrorism
2,terror,/m/07jq_,terrorism
3,attack,/m/0gtxdb2,terrorism
4,iraq,/m/0d05q4,terrorism
5,afghanistan,/m/0jdd,terrorism
6,iran,/m/03shp,terrorism
7,Pakistan,/m/05sb1,terrorism
8,Agro,/m/019jkv,terrorism
9,Environmental Terrorism,/m/02w1mcd,terrorism
