In [1]:
#import lib

import csv
import json
import tempfile
from ibm_watson import DiscoveryV2
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

In [2]:
#import IBM API key and service URL from ibm-credentials.env

import os
from dotenv import load_dotenv
load_dotenv('ibm-discovery-credentials.env')
API_KEY = os.getenv('DISCOVERY_APIKEY')
URL=os.getenv('DISCOVERY_URL')

In [3]:
#load API key

authenticator = IAMAuthenticator(API_KEY)
discovery = DiscoveryV2(
    version='2020-08-30',
    authenticator=authenticator
)

discovery.set_service_url(URL)
#discovery.set_disable_ssl_verification(True)

In [4]:
#constants

PROJECT_ID='daa312d8-8893-4782-970d-5aa98e368399'
COLLECTION_SIZE=263
RISK_SCORE_INIT=0

In [5]:
#convert ad listing csv data to list
### the same data have been uploaded to and processed by watson Discovery service currently under use ###

data_list=[]

title_arr=[]
ad_id_arr=[]
country_arr=[]
sector_arr=[]
date_arr=[]
risk_arr=[]

with open("locanto_demo.csv",newline='',encoding='utf-8') as f:
    reader=csv.reader(f)
    boolVal = 0
    for row in reader:
        data_list.append(row)
        temp = str(row).split(',')
        while boolVal == 1:
                title_arr.append(temp[0])
                ad_id_arr.append(temp[1])
                country_arr.append(temp[len(temp)-3])
                sector_arr.append(temp[len(temp)-1])
                date_arr.append(temp[len(temp)-2])
                risk_arr.append(RISK_SCORE_INIT)
                                
                boolVal = 0
        boolVal = 1
#print(len(ad_id_arr))

In [6]:
#format ads list into json

line=1
print("{\"ads\": [")
while line < len(data_list):
    # take the specific string values out of the array and remove unwanted characters
    unwantedChars = "\"][\'"
    temp_title = title_arr[line-1]
    temp_ad_id = ad_id_arr[line-1].replace("'","")
    temp_country = country_arr[line-1].replace("'","")
    temp_sector = sector_arr[line-1].replace("'","")
    temp_date=date_arr[line-1].replace("'","")
    for unwantedChar in unwantedChars:
        temp_title = temp_title.replace(unwantedChar, "")
        temp_country = temp_country.replace(unwantedChar,"")
        temp_sector = temp_sector.replace(unwantedChar,"")
    print(f"{'{'}\n")
    print(f" \"{'Job_title'}\": {'{'}\n   \"{'title'}\": \"{temp_title}\"\n {'}'},\n")
    print(f" \"{'Ad_id'}\": {'{'}\n   \"{'id'}\": \"{temp_ad_id}\"\n {'}'},\n")
    print(f" \"{'Country'}\": {'{'}\n   \"{'country'}\": \"{temp_country}\"\n {'}'},\n")
    print(f" \"{'Sector'}\": {'{'}\n   \"{'sector'}\": \"{temp_sector}\"\n {'}'},\n")
    print(f" \"{'Date'}\": {'{'}\n   \"{'date'}\": \"{temp_date}\"\n {'}'},\n")
    print(f" \"{'Risk_score'}\": {'{'}\n   \"{'risk'}\": \"{RISK_SCORE_INIT}\"\n {'}'}\n")
    str = (('}', '},') [line < (len(data_list)-1)])
    print(str)
    line = line + 1
print("]}")

### save output as 'locanto_demo_formatted.json' ###

{"ads": [
{

 "Job_title": {
   "title": "Customer Service Assistant - Jameson Visitor Centre …"
 },

 "Ad_id": {
   "id": " 5499952319"
 },

 "Country": {
   "country": " Ireland"
 },

 "Sector": {
   "sector": " Customer Service"
 },

 "Date": {
   "date": " 2022-04-09"
 },

 "Risk_score": {
   "risk": "0"
 }

},
{

 "Job_title": {
   "title": "Customer Service Specialist - Dutch"
 },

 "Ad_id": {
   "id": " 5495340327"
 },

 "Country": {
   "country": " Ireland"
 },

 "Sector": {
   "sector": " Customer Service"
 },

 "Date": {
   "date": " 2022-04-09"
 },

 "Risk_score": {
   "risk": "0"
 }

},
{

 "Job_title": {
   "title": "Overnight Customer Assistant - Ballywaltrim Lane"
 },

 "Ad_id": {
   "id": " Bray"
 },

 "Country": {
   "country": " Ireland"
 },

 "Sector": {
   "sector": " Customer Service & Call Centre"
 },

 "Date": {
   "date": " 2022-04-09"
 },

 "Risk_score": {
   "risk": "0"
 }

},
{

 "Job_title": {
   "title": "Air Logistics Customer Care Expert"
 },

 "Ad_id": {

In [None]:
#import NLU queries from csv to array

queries_list=[]

keywords_arr=[]
weights_arr=[]

with open("nlu_keywords_queries.csv",newline='',encoding='utf-8') as q:
    queryReader=csv.reader(q)
    boolVal = 0
    for row in queryReader:
        queries_list.append(row)
        temp = str(row).split(',')
        while boolVal == 1:
                keywords_arr.append(temp[0])
                weights_arr.append(temp[1])

                boolVal = 0
        boolVal = 1
#print(len(queries_list))

In [None]:
#method to update risk score in data array
def updateRisk(tempFilePath,keywordWeight):
    with open (tempFilePath,"r") as qF:
        queryResult = json.load(qF)
    with open ('locanto_demo_formatted.json',"w") as lF:
        adList = json.load(lF)

    for result in queryResult['results']:
        adId = result.get('ad_id')
        riskNew = result['result_metadata'].get('confidence')
        for ad in adList['ads']:
            if ad['Ad_id'].get('id')==adId:
                riskOld=ad['Risk_score'].get('risk')
                riskUpdated=riskOld+riskNew*keywordWeight
                ad['Risk_score']['risk']=riskUpdated
                break

In [None]:
#iterate NLU queries with Watson Discovery and get confidence in result_metadata as risk score
query=1
while query < len(queries_list):
# while query < 2:
    response = discovery.query(
        project_id=PROJECT_ID,
        natural_language_query=queries_list[query][0],
        count=COLLECTION_SIZE,
        return_=['ad_id']).get_result()
    #print(json.dumps(response, indent=2))
    with tempfile.NamedTemporaryFile(suffix='.json') as tf:
        json.dumps(response, indent=2)
        tfPath = tf.name
        updateRisk(tfPath,queries_list[query][1])

In [None]:
### Watson Discovery NLU query sample ###
#risk score for a single NLU query
response = discovery.query(
        project_id=PROJECT_ID,
        natural_language_query=queries_list[1][0],
        count=COLLECTION_SIZE,
        return_=['ad_id']).get_result()
print(json.dumps(response, indent=2))
### output saved as 'sampleQR.json' ###