# Use Case A - Query builder 
***
### Using 2-grams, 3-grams, 4-grams with content from the Statistics Explained(SE) articles and the SE Glossary articles

### Adjusted (June 2022) to read all information from the Knowledge Database

### Installation instructions
This is a Google Colab notebook. You must have a Google account. 

Launch the notebook and put your own credentials in the chunk with title "Connect to the Virtuoso database".


### Installations

In [1]:
!pip install SPARQLWrapper
!pip install sparql_dataframe

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!apt-get install virtuoso-opensource

Reading package lists... Done
Building dependency tree       
Reading state information... Done
virtuoso-opensource is already the newest version (6.1.6+repack-0ubuntu9).
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [3]:
!pip install nltk==3.4 ## needs latest version

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### Imports and settings

In [4]:
##import pyodbc

from SPARQLWrapper import SPARQLWrapper, POST, DIGEST, GET
from SPARQLWrapper import JSON, INSERT, DELETE
import sparql_dataframe

import pandas as pd
import numpy as np

import gensim

import re
import unicodedata as ud

pd.set_option('display.max_colwidth', 40)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Connect to the Virtuoso database

In [5]:
user = 'xxxxx'
passw = 'xxxxx'

In [6]:
def connect_virtuoso(DSN, UID, PWD):

    sparql = SPARQLWrapper(DSN)
    sparql.setHTTPAuth(DIGEST)
    sparql.setCredentials(UID, PWD)
    sparql.setMethod(GET)

    return sparql

# Connection to the KDB 
endpoint = "http://lod.csd.auth.gr:8890/sparql/"
sparql = connect_virtuoso(endpoint,user,passw)


### The data cleaning function

In [7]:

def clean(x, quotes=True):
    if pd.isnull(x): return x  
    x = x.strip()
    
    ## make letter-question mark-letter -> letter-quote-space-letter !!! but NOT in the lists of URLs!!!
    if quotes:
        x = re.sub(r'([A-Za-z])\?([A-Za-z])','\\1\' \\2',x) 
    
    ## make letter-question mark-space lower case letter letter-quote-space letter
    x = re.sub(r'([A-Za-z])\? ([a-z])','\\1\' \\2',x) 

    ## delete ,000 commas in numbers    
    x = re.sub(r'\b(\d+),(\d+)\b','\\1\\2',x) ## CORRECTED
    
    ## delete  000 spaces in numbers
    x = re.sub(r'\b(\d+) (\d+)\b','\\1\\2',x) ## CORRECTED
    
    ## remove more than one spaces
    x = re.sub(r' +', ' ',x)
    
    ## remove start and end spaces
    x = re.sub(r'^ +| +$', '',x,flags=re.MULTILINE) 
    
    ## space-comma -> comma
    x = re.sub(r' \,',',',x)
    
    ## space-dot -> dot
    x = re.sub(r' \.','.',x)
    
    #x = x.encode('latin1').decode('utf-8') ## â\x80\x99
    x = ud.normalize('NFKD',x).encode('ascii', 'ignore').decode()
    
    return x



### Load data from the database

### Glossary articles  

* Titles, URLs and definitions.
* Exclude some redirections.
* Exclude one invalid URL from a remnant empty page.

### The SPARQL query

In [8]:
RelationsStatements = """
DEFINE input:inference <https://ec.europa.eu/eurostat/NLP4StatRef/knowledge/>
PREFIX estat: <https://ec.europa.eu/eurostat/NLP4StatRef/ontology/>
PREFIX estatdata: <https://ec.europa.eu/eurostat/NLP4StatRef/knowledge/>
select ?a ?title ?url ?content where {
    ?a a estat:GlossaryArticle .
    ?a estat:title ?title .
    ?a estat:hasURL ?url.
    ?a estat:content ?content .
    filter(!regex(?content, "^(The revision|Redirect to)")) 
    filter( regex(?url, "Glossary:")) 
} 
"""
## estat:GlossaryArticle OR StatisticsExplainedArticle
  
sparql.setQuery(RelationsStatements)
sparql.method = "POST"
sparql.setReturnFormat(JSON)
GL_df = sparql.query().convert()['results']['bindings']
GL_df = pd.json_normalize(GL_df)
print(GL_df.columns)
GL_df

Index(['a.type', 'a.value', 'title.type', 'title.value', 'url.type',
       'url.datatype', 'url.value', 'content.type', 'content.value'],
      dtype='object')


Unnamed: 0,a.type,a.value,title.type,title.value,url.type,url.datatype,url.value,content.type,content.value
0,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Accident at work,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,An accident at work in the framework...
1,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Gross domestic product GDP,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,Gross domestic product abbreviated a...
2,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Toxicity,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,Toxicity measures the degree to whic...
3,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Structural fund,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,The Structural funds are funding ins...
4,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,PRODCOM,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,PRODCOM is an annual survey for the ...
...,...,...,...,...,...,...,...,...,...
1276,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Gross operating rate,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,The gross operating rate in structur...
1277,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Gross operating surplus,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,Gross operating surplus or profits i...
1278,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Personnel costs,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,Within the context of structural bus...
1279,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Stratum,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...,literal,In statistics a stratum plural strat...


### Processing and cleaning

In [9]:
GL_df.drop(columns=['a.type','a.value','title.type','url.datatype','content.type'],inplace=True)
GL_df.rename(columns={'title.value':'title','url.value':'url','content.value':'definition'},inplace=True)
GL_df['id'] = range(len(GL_df))
GL_df = GL_df[['id','title','url','definition']]

GL_df = GL_df.replace('', np.nan) ## Check for anything empty
print(GL_df.isnull().sum())

## Check for duplicates
idx = GL_df[GL_df.duplicated(subset=['title','definition'], keep=False)].sort_values(by=['title','definition']).index
print(idx)
GL_df

id            0
title         0
url           0
definition    0
dtype: int64
Int64Index([], dtype='int64')


Unnamed: 0,id,title,url,definition
0,0,Accident at work,https://ec.europa.eu/eurostat/statis...,An accident at work in the framework...
1,1,Gross domestic product GDP,https://ec.europa.eu/eurostat/statis...,Gross domestic product abbreviated a...
2,2,Toxicity,https://ec.europa.eu/eurostat/statis...,Toxicity measures the degree to whic...
3,3,Structural fund,https://ec.europa.eu/eurostat/statis...,The Structural funds are funding ins...
4,4,PRODCOM,https://ec.europa.eu/eurostat/statis...,PRODCOM is an annual survey for the ...
...,...,...,...,...
1276,1276,Gross operating rate,https://ec.europa.eu/eurostat/statis...,The gross operating rate in structur...
1277,1277,Gross operating surplus,https://ec.europa.eu/eurostat/statis...,Gross operating surplus or profits i...
1278,1278,Personnel costs,https://ec.europa.eu/eurostat/statis...,Within the context of structural bus...
1279,1279,Stratum,https://ec.europa.eu/eurostat/statis...,In statistics a stratum plural strat...


In [10]:
GL_df['title'] = GL_df['title'].apply(clean)
GL_df['title'] = GL_df['title'].apply(lambda x: re.sub(r'\?','-',x)) ## also replace question marks by dashes
GL_df['definition'] = GL_df['definition'].apply(clean)
GL_df['url'] = GL_df['url'].apply(clean,quotes=False)  
GL_df['url'] = GL_df['url'].apply(lambda x: [x]) ## also put each URL in a list - required later
GL_df

Unnamed: 0,id,title,url,definition
0,0,Accident at work,[https://ec.europa.eu/eurostat/stati...,An accident at work in the framework...
1,1,Gross domestic product GDP,[https://ec.europa.eu/eurostat/stati...,Gross domestic product abbreviated a...
2,2,Toxicity,[https://ec.europa.eu/eurostat/stati...,Toxicity measures the degree to whic...
3,3,Structural fund,[https://ec.europa.eu/eurostat/stati...,The Structural funds are funding ins...
4,4,PRODCOM,[https://ec.europa.eu/eurostat/stati...,PRODCOM is an annual survey for the ...
...,...,...,...,...
1276,1276,Gross operating rate,[https://ec.europa.eu/eurostat/stati...,The gross operating rate in structur...
1277,1277,Gross operating surplus,[https://ec.europa.eu/eurostat/stati...,Gross operating surplus or profits i...
1278,1278,Personnel costs,[https://ec.europa.eu/eurostat/stati...,Within the context of structural bus...
1279,1279,Stratum,[https://ec.europa.eu/eurostat/stati...,In statistics a stratum plural strat...


### Statistics explained articles

* Titles, URLs and concatenated content from all paragraphs (excluding abstracts).

### The SPARQL query

In [11]:
RelationsStatements = """
DEFINE input:inference <https://ec.europa.eu/eurostat/NLP4StatRef/knowledge/>
PREFIX estat: <https://ec.europa.eu/eurostat/NLP4StatRef/ontology/>
PREFIX estatdata: <https://ec.europa.eu/eurostat/NLP4StatRef/knowledge/>
select ?x ?title (group_Concat(?para," ") as ?text) (sample(?url) as ?url) where {
    ?x a estat:StatisticsExplainedArticle .
    ?x estat:title ?title .
    ?x estat:hasURL ?url.
    ?x estat:hasParagraph  ?h .
    ?h estat:title ?t1 .
    FILTER(?t1!="Abstract") .
    ?h estat:content ?c .
    bind (concat(?t1," ",?c," ") as ?para)
} group by ?x ?title
"""
  
sparql.setQuery(RelationsStatements)
sparql.method = "POST"
sparql.setReturnFormat(JSON)
SE_df = sparql.query().convert()['results']['bindings']
SE_df = pd.json_normalize(SE_df)
print(SE_df.columns)
SE_df



Index(['x.type', 'x.value', 'title.type', 'title.value', 'text.type',
       'text.value', 'url.type', 'url.datatype', 'url.value'],
      dtype='object')


Unnamed: 0,x.type,x.value,title.type,title.value,text.type,text.value,url.type,url.datatype,url.value
0,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Russia EU international trade in goo...,literal,Recent developments impact of COVID ...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
1,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Africa EU key statistical indicators,literal,Population and health With the world...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
2,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Comparative price levels for investment,literal,Overview In 2020 the highest price l...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
3,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,First and second generation immigran...,literal,General overview The EU attracts qui...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
4,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,Migrant integration statistics intro...,literal,Migrant integration in the EU The co...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
...,...,...,...,...,...,...,...,...,...
860,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,EU statistics on income and living c...,literal,Description The indicator on transit...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
861,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,European Neighbourhood Policy South ...,literal,Gross value added and employment Bet...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
862,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,EU statistics on income and living c...,literal,Description Each one of the indicato...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...
863,uri,https://ec.europa.eu/eurostat/NLP4St...,literal,E commerce statistics,literal,E sales record a slight increase ove...,typed-literal,http://www.w3.org/2001/XMLSchema#anyURI,https://ec.europa.eu/eurostat/statis...


### Processing and cleaning

In [12]:
SE_df.drop(columns=['x.type','x.value','title.type','text.type','url.type','url.datatype'],inplace=True)
SE_df.rename(columns={'title.value':'title','url.value':'url','text.value':'raw content'},inplace=True)
SE_df['id'] = range(len(SE_df))
SE_df = SE_df[['id','title','url','raw content']]

SE_df['title'] = SE_df['title'].apply(clean)
SE_df['url'] = SE_df['url'].apply(clean,quotes=False)
SE_df['url'] = SE_df['url'].apply(lambda x: [x])
SE_df['raw content'] = SE_df['raw content'].apply(clean)
#SE_df.to_excel('SE_df.xlsx')
SE_df

Unnamed: 0,id,title,url,raw content
0,0,Russia EU international trade in goo...,[https://ec.europa.eu/eurostat/stati...,Recent developments impact of COVID ...
1,1,Africa EU key statistical indicators,[https://ec.europa.eu/eurostat/stati...,Population and health With the world...
2,2,Comparative price levels for investment,[https://ec.europa.eu/eurostat/stati...,Overview In 2020 the highest price l...
3,3,First and second generation immigran...,[https://ec.europa.eu/eurostat/stati...,General overview The EU attracts qui...
4,4,Migrant integration statistics intro...,[https://ec.europa.eu/eurostat/stati...,Migrant integration in the EU The co...
...,...,...,...,...
860,860,EU statistics on income and living c...,[https://ec.europa.eu/eurostat/stati...,Description The indicator on transit...
861,861,European Neighbourhood Policy South ...,[https://ec.europa.eu/eurostat/stati...,Gross value added and employment Bet...
862,862,EU statistics on income and living c...,[https://ec.europa.eu/eurostat/stati...,Description Each one of the indicato...
863,863,E commerce statistics,[https://ec.europa.eu/eurostat/stati...,E sales record a slight increase ove...


### Tokenize, remove stop-words and stem; keep also the original terms

* Use titles and definitions from the Glossary articles.
* Use titles and raw content from the SE articles
* _texts_ is a list containing lists. Each sub-list has the stemmed term, the original term and the URL where the term was found. The URL is itself put in a list.

In [13]:

from gensim.parsing.preprocessing import remove_stopwords
from gensim.parsing.preprocessing import stem_text
from gensim.parsing.porter import PorterStemmer

p = PorterStemmer()

def text_to_words(text,url):
    words = str(gensim.utils.simple_preprocess(text, deacc=True))
    words = remove_stopwords(words) 
    words = gensim.utils.tokenize(words)
        
    ## keep also original token!!! 
    words = [[p.stem(token),token,url] for token in words if len(p.stem(token)) >= 5] ##minimum length = 5 
    yield words        

texts=list()   

for i in range(len(GL_df)):
    texts.extend(text_to_words(GL_df.loc[i,'definition'],GL_df.loc[i,'url']))
    texts.extend(text_to_words(GL_df.loc[i,'title'],GL_df.loc[i,'url'])) 
for i in range(len(SE_df)):    
    texts.extend(text_to_words(SE_df.loc[i,'title'],SE_df.loc[i,'url'])) 
    texts.extend(text_to_words(SE_df.loc[i,'raw content'],SE_df.loc[i,'url'])) 

* Example: the first 5 sub-lists in the first list in _texts_.

In [14]:
print(len(texts))
texts[0][:5]


4292


[['accid',
  'accident',
  ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Accident_at_work']],
 ['framework',
  'framework',
  ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Accident_at_work']],
 ['administr',
  'administrative',
  ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Accident_at_work']],
 ['collect',
  'collection',
  ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Accident_at_work']],
 ['european',
  'european',
  ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Accident_at_work']]]

### Co-occurences: keys in n-grams are (n-1) tuples of stemmed tokens 

* Three dictionaries, for 2-,3-,and 4-grams. The corresponding keys are single stemmed terms, pairs of stemmed terms and triplets, respectively.
* For each key in a dictionary, the value is another (nested) dictionary with the **original terms**, their counts and the relevant URLs. In the end, the counts are used to calculate probabilities.
* Below all three dictionaries are constructed from the sequences of 4-grams.


In [15]:
from nltk import bigrams, trigrams, ngrams
#from collections import Counter, defaultdict


model2=dict()
model3=dict()
model4=dict()

def dict_insert(model,entered,proposed,new_urls_to_check):
    key_1 = model.get(entered)
    if key_1:
        key_2 = key_1.get(proposed)
        if key_2:
            key_2[0] +=1
            existing_urls = key_2[1]
            add_urls = [u for u in new_urls_to_check if u not in existing_urls]
            key_2.extend(add_urls)
        else:    
            key_1[proposed]= [1,new_urls_to_check]
    else:
        model[entered]={proposed:[1,new_urls_to_check]}
    return model            


# Co-occurences

for sentence in texts:
    pairs = [elem for elem in sentence] ## a list of 3-tuples (stemmed term, original term, list of URLs)
    if len(pairs) == 0: continue
    
    for first, second, third, fourth in ngrams(pairs,4): ## quadruplets of 3-tuples (stemmed term, original term, list of URLs)
        first_stem, first_orig, first_url = first
        second_stem, second_orig, second_url = second
        third_stem, third_orig, third_url = third
        fourth_stem, fourth_orig, fourth_url = fourth
       
        model2 = dict_insert(model2, first_stem, second_orig,list(set(first_url).intersection(second_url)))
        model2 = dict_insert(model2, second_stem, third_orig,list(set(second_url).intersection(third_url)))
        model2 = dict_insert(model2, third_stem, fourth_orig,list(set(third_url).intersection(fourth_url)))
        
        model3 = dict_insert(model3,(first_stem,second_stem),third_orig,list(set(first_url).intersection(*[second_url,third_url])))
        model3 = dict_insert(model3,(second_stem,third_stem),fourth_orig,list(set(second_url).intersection(*[third_url,fourth_url])))
        
        model4 = dict_insert(model4,(first_stem, second_stem, third_stem),fourth_orig,fourth_url)
   
        
## Transform counts to probabilities

for w1 in model2.keys():
    ssum = sum(model2[w1][w2][0] for w2 in model2[w1].keys())
    for w2 in model2[w1].keys():
        model2[w1][w2][0] /= ssum

for w1_w2 in model3.keys():
    ssum = sum(model3[w1_w2][w3][0] for w3 in model3[w1_w2].keys())
    for w3 in model3[w1_w2].keys():
        model3[w1_w2][w3][0] /= ssum

for w1_w2_w3 in model4.keys():
    ssum = sum(model4[w1_w2_w3][w4][0] for w4 in model4[w1_w2_w3].keys())
    for w4 in model4[w1_w2_w3].keys():
        model4[w1_w2_w3][w4][0] /= ssum


### Examples of keys and values in the three dictionaries

* We do not show an example of the 2-grams dictionary because the values are too many.


In [16]:
print('\nExample, model with 3-grams, key = (basic,structur):\n')
print(model3[('basic','structur')])

print('\nExample, model with 4-grams, key = (survei,structur,agricultur):\n')
print(model4[('survei','structur','agricultur')])



Example, model with 3-grams, key = (basic,structur):

{'survey': [0.3333333333333333, ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Farm_structure_survey_(FSS)']], 'employment': [0.6666666666666666, ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Statistics_on_small_and_medium-sized_enterprises']]}

Example, model with 4-grams, key = (survei,structur,agricultur):

{'carried': [0.3333333333333333, ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Glossary:Farm_structure_survey_(FSS)']], 'definitions': [0.3333333333333333, ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Survey_on_agricultural_production_methods']], 'project': [0.3333333333333333, ['https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Survey_on_agricultural_production_methods']]}


### The widgets
***

In [17]:
import ipywidgets as widgets
layout = widgets.Layout(width='600px', height='30px')

In [18]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [19]:

def change_top_articles( Keywords):

    from operator import itemgetter
    p = PorterStemmer()
    
    last_match = ''
    
    #if not Keywords.endswith(' '):
    #    return None
    
    def test_and_back_step(x):
        mod_index = -1
        models = [model2,model3,model4]
        if len(x)==1: 
            x=x[0] ; model=models[0]
        elif len(x) ==2 or len(x) == 3:
            x=tuple(x) ; mod_index=len(x)-1; model=models[mod_index]
        elif len(x) >=4:
            x=tuple(x[:3]) ; model=models[2]
#        else:
#            return None
        while not models[mod_index].get(x) and mod_index >=0:
            x=x[:-1]
            if len(x)==1 : x=x[0]
            mod_index -=1 ; model=models[mod_index] 
        return (model,x)    
            

    x = Keywords.split() 
    if len(x) ==0: 
        print()
        return
    x = [p.stem(el) for el in x]
    
    
    model,x = test_and_back_step(x)
    if not model.get(x):
        return None
    

    print()
    print('Based on last match: ',x,'\n')
    print('Suggestions, probabilities (in descending order) and relevant URLs: ')
    proposals = sorted([(k,v) for (k,v) in model[x].items()],key=itemgetter(1),reverse=True)
    last_match = x
    for key, value in proposals:
        print()
        print(key,': ',value[0])
        for url in value[1]:
            print(url)
   
    
def query_build1(value):
  style = {'description_width': 'initial'}
    
  Keywords = widgets.Text(
      value=value,
      placeholder='Type something',
      description='Keywords:',
      disabled=False
  )

  ui = widgets.HBox([Keywords])
  out = widgets.interactive_output(change_top_articles, {'Keywords': Keywords})
  display(ui, out)
    
query_build1(value='household income')  

HBox(children=(Text(value='household income', description='Keywords:', placeholder='Type something'),))

Output()