## Knowledge Explorer

### Setup and Imports

In [1]:
# Basic APIs
import requests
from bs4 import BeautifulSoup
import json

# Google APIs
from googletrans import Translator

In [2]:
# First we need to store our id (api key for later use)

google_id = "AIzaSyD63SxXydT6Wl0UcLaVHo5AJP5tk2BbEQo"
serp_id = 'EDB90B00619D4B5187698BCCF471D074'

### Translation 

We will need to use the translation function for both:
* Translating the inputs (in any langauge) to a language that we can explore (for our project: English)
* Translating the knowledge graph, keywords and other inforation to the user language
* We are going to use a libary from PyPl -- googletrans 3.00
    * Use the service from **Google Translate**
    * Auto detection of language
    * Bulk translation

In [3]:
# detect the input language

translator = Translator()  # Create an Translator instance to use the API

def detect(str):
    result = translator.detect(str)
    return result

In [4]:
# testing detect

print(detect('이 문장은 한글로 쓰여졌습니다.'))
print(detect('Tiu frazo estas skribita en Esperanto.'))
print(detect('你好'))
print(detect("bravo"))  # Some words may be ambiguous 

Detected(lang=ko, confidence=1)
Detected(lang=eo, confidence=1)
Detected(lang=zh-CN, confidence=1)
Detected(lang=en, confidence=0.91744345)


In [7]:
detect('이 문장은 한글로 쓰여졌습니다.').lang

'ko'

In [13]:
# translate the input language

def translate(str, dest="en"):
    trans_result = translator.translate(str, dest)
    return trans_result

In [8]:
# testing

print(translate('chandelier'))  # English
print(translate('лустер'))  # Serbian to English
print(translate('الثريا'))  # Arabic to English

Translated(src=en, dest=en, text=chandelier, pronunciation=None, extra_data="{'translat...")
Translated(src=mk, dest=en, text=chandelier, pronunciation=None, extra_data="{'translat...")
Translated(src=ar, dest=en, text=chandelier, pronunciation=None, extra_data="{'translat...")


### Keyword Extraction

The keywords can be extreme helpful for:

* Exploring new concepts and new ideas based on the given query
* Allowing iterative search for the users to further research their topics
* Generating insights about how different culture, language and people understand the query.
* We are going to use Scale SERP API, which is realtime google search API

In [37]:
# We are going to do a simple search and see what infromation we can get for use

params = {
  'api_key': 'EDB90B00619D4B5187698BCCF471D074',
  'q': 'pizza'  # This query will be translated word
}

In [38]:
# make the http GET request to Scale SERP
response = requests.get('https://api.scaleserp.com/search', params)

# save the JSON response from Scale SERP
todos = json.loads(response.text)

In [44]:
# Let's take a look at what we can get from the parsed Google site

todos.keys()

dict_keys(['request_info', 'search_metadata', 'search_parameters', 'search_information', 'inline_videos', 'inline_recipes', 'top_stories', 'top_stories_extra', 'local_map', 'local_results', 'local_results_more_link', 'knowledge_graph', 'related_searches', 'related_questions', 'pagination', 'organic_results'])

#### Understanding data

We are going to explore these fields one by one, understand the content and see what kind of information we can extract, process and present to the users.

In [45]:
# Doesn't have useful information

todos['request_info']  

{'success': True,
 'credits_used': 6,
 'credits_used_this_request': 1,
 'credits_remaining': 119,
 'credits_reset_at': '2022-05-12T21:28:53.000Z'}

In [46]:
# This field gives us some information about how long the search takes, 
# and we can also determine the output format

todos['search_metadata']

{'created_at': '2022-04-12T21:48:23.448Z',
 'processed_at': '2022-04-12T21:48:28.231Z',
 'total_time_taken': 4.78,
 'engine_url': 'https://www.google.com/search?q=pizza',
 'html_url': 'https://api.scaleserp.com/search?api_key=EDB90B00619D4B5187698BCCF471D074&q=pizza&engine=google&output=html',
 'json_url': 'https://api.scaleserp.com/search?api_key=EDB90B00619D4B5187698BCCF471D074&q=pizza&engine=google&output=json'}

In [50]:
# We would like to store the videos to our results because,
# for users who perhaps cannot read the destination language, 
# videos and images can be extremely useful in our visualization

todos['inline_videos'][0]

{'position': 1,
 'title': 'IMPASTO della PIZZA – La nostra ricetta PERFETTA per ...',
 'link': 'https://www.youtube.com/watch?v=IlKb6LpRNxc&vl=it',
 'length': '5.22',
 'source': 'YouTube - GialloZafferano',
 'date': '4 dic 2021',
 'date_utc': '2021-12-04T00:00:00.000Z',
 'block_position': 10}

In [17]:
LANGUAGES = {
    'ar': 'Arabic',
    'bg': 'Bulgarian',
    'cs': 'Czech',
    'de': 'German',
    'el': 'Greek',
    'es': 'Spanish',
    'fr': 'French',
    'ga': 'Irish',
    'he': 'Hebrew',
    'it': 'Italian',
    'ja': 'Japanese',
    'ko': 'Korean',
    'ms': 'Malay',
    'sw': 'Swahili',
    'ur': 'Urdu',
    'zh-CN': 'Chinese'
}


In [27]:
output = []

for key in LANGUAGES:
    print(translator.translate('hello', dest=key))


Translated(src=en, dest=ar, text=أهلا, pronunciation='ahlan, extra_data="{'translat...")
Translated(src=en, dest=bg, text=Здравейте, pronunciation=Zdraveĭte, extra_data="{'translat...")
Translated(src=en, dest=cs, text=Ahoj, pronunciation=None, extra_data="{'translat...")
Translated(src=en, dest=de, text=hallo, pronunciation=None, extra_data="{'translat...")
Translated(src=en, dest=el, text=γεια σας, pronunciation=geia sas, extra_data="{'translat...")
Translated(src=en, dest=es, text=Hola, pronunciation=None, extra_data="{'translat...")
Translated(src=en, dest=fr, text=salut, pronunciation=None, extra_data="{'translat...")
Translated(src=en, dest=ga, text=Dia dhuit, pronunciation=None, extra_data="{'translat...")
Translated(src=en, dest=he, text=שלום, pronunciation=None, extra_data="{'translat...")
Translated(src=en, dest=it, text=ciao, pronunciation=None, extra_data="{'translat...")
Translated(src=en, dest=ja, text=こんにちは, pronunciation=Kon'nichiwa, extra_data="{'translat...")
Transla

In [None]:
https://api.dp.la/v2/items?q=kittens&api_key=