In [9]:
#Create a cluster
import subprocess
import json
import shlex
import os
from watson_developer_cloud import RetrieveAndRankV1
import pysolr

In [25]:
#getting current directory
curdir = os.getcwd()

#loading credentials
credFilePath = curdir+'\\answer-retrieval-master\\config\\credentials.json'
credentials = ''

credentials ={
  "username": "username",
  "password": "password",
  "url": "https://gateway.watsonplatform.net/retrieve-and-rank/api/v1/",
  "cs_ranker_id": "CUSTOM_RANKER_ID",
  "ranker_id": "RANKER_ID",
  "collection_name": "Alarm_collection",
  "config_name": "Alarm_config_new",
  "cluster_id": "solrid",
}

BASEURL=credentials['url']
SOLRURL= BASEURL+"solr_clusters/"
USERNAME=credentials['username']
PASSWORD=credentials['password']

#please provide the name and size of the cluster below
CLUSTER_NAME="rr_andriod_cluster"
CLUSTER_SIZE=1  #Size of the cluster to create. Ranges from 1 to 7. 
                #Send an empty value to create a small free cluster for testing. 
                #You can create only one free cluster.

#Checking if cluster with same name already exists
retrieve_and_rank = RetrieveAndRankV1(
    username=USERNAME,
    password=PASSWORD)
found = False
cluster_id = ''
output = retrieve_and_rank.list_solr_clusters()
try:
    parsed_json = output
    clusters = parsed_json['clusters']
    for i in range(len(clusters)):
        cluster_json = clusters[i]
        if cluster_json['cluster_name'] == CLUSTER_NAME:
            found = True
            cluster_id = cluster_json['solr_cluster_id']
except:
    print ('Command:')
    print ('retrieve_and_rank.list_solr_clusters()')
    print ('Response:')
    print (output) 

if found:
    print ("Cluster "+CLUSTER_NAME+" already exists with ID "+cluster_id+".")
    print (json.dumps(parsed_json, sort_keys=True, indent=4))
else:
    #Running command that creates a cluster
    output = retrieve_and_rank.create_solr_cluster(cluster_name=CLUSTER_NAME,cluster_size=CLUSTER_SIZE)

    try:
        parsed_json = output
        print (json.dumps(parsed_json, sort_keys=True, indent=4))
        credentials['cluster_id'] = parsed_json['solr_cluster_id']
        with open(credFilePath, 'w') as credFileUpdated:
            json.dump(credentials, credFileUpdated)
    except:
        print ('Command:')
        print ('retrieve_and_rank.create_solr_cluster')
        print ('Response:')
        print (output)

Cluster rr_andriod_cluster already exists with ID solr_id.
{
    "clusters": [
        {
            "cluster_name": "",
            "cluster_size": "",
            "solr_cluster_id": "solr_id",
            "solr_cluster_status": "READY"
        },
        {
            "cluster_name": "rr_andriod_cluster",
            "cluster_size": "1",
            "solr_cluster_id": "solr_id",
            "solr_cluster_status": "READY"
        }
    ]
}


In [26]:
#check the status
curdir = os.getcwd()

#loading credentials
credFilePath = curdir+'/answer-retrieval-master/config/credentials.json'
credentials = ''
with open(credFilePath) as credFile:
    credentials = json.load(credFile)
USERNAME=credentials['username']
PASSWORD=credentials['password']
#get cluster id from the loaded credentials in Step 1
CLUSTER_ID=credentials['cluster_id']
 
retrieve_and_rank = RetrieveAndRankV1(
    username=USERNAME,
    password=PASSWORD)

#Running command that checks the status of a cluster
output = retrieve_and_rank.get_solr_cluster_status(solr_cluster_id=CLUSTER_ID)

try:
    parsed_json = json.loads(output)
    print (json.dumps(parsed_json, sort_keys=True, indent=4))
except:
    print ('Command:')
    print ('retrieve_and_rank.get_solr_cluster_status')
    print ('Response:')
    print (output)

Command:
retrieve_and_rank.get_solr_cluster_status
Response:
{'cluster_size': '1', 'solr_cluster_status': 'READY', 'solr_cluster_id': 'scdc06a2ff_d53c_47d7_bd93_9ab8c17b77d3', 'cluster_name': 'rr_andriod_cluster'}


In [27]:
#upload config
curdir = os.getcwd()

#loading credentials
credFilePath = curdir+'/answer-retrieval-master/config/credentials.json'
credentials = ''
with open(credFilePath) as credFile:
    credentials = json.load(credFile)
USERNAME=credentials['username']
PASSWORD=credentials['password']
# upload config
#getting relative path to config file
CONFIG_PATH=curdir+"/answer-retrieval-master/config/"

#get cluster id from the loaded credentials in Step 1
SOLR_CLUSTER_ID=credentials['cluster_id']
found = False

#please provide the configuration name and and the path to the zip file
CONFIG_NAME="rr_android_config"   #Do not add spaces or special characters
CONFIG_FILE_NAME="config.zip" #Change this if you renamed the file

retrieve_and_rank = RetrieveAndRankV1(
    username=USERNAME,
    password=PASSWORD)

# Check if config exists
output = retrieve_and_rank.list_configs(solr_cluster_id=SOLR_CLUSTER_ID)
try:
    configs = output['solr_configs']
    for i in range(len(configs)):
        config_name = configs[i]
        if config_name == CONFIG_NAME:
            found = True
except:
    print ('Command:')
    print ('retrieve_and_rank.list_solr_clusters()')
    print ('Response:')
    print (output) 

if found:
    print ("Configuration "+CONFIG_NAME+" already exists.")
    print (json.dumps(output, sort_keys=True, indent=4))
else:
    #Running command that uploads a configuration to solr
    with open(CONFIG_PATH + CONFIG_FILE_NAME, 'rb') as config:
        output = retrieve_and_rank.create_config(SOLR_CLUSTER_ID, CONFIG_NAME, config)

    try:
        print (json.dumps(output, sort_keys=True, indent=4))
        credentials['config_name'] = CONFIG_NAME
        with open(credFilePath, 'w') as credFileUpdated:
            json.dump(credentials, credFileUpdated)
    except:
        print ('Command:')
        print ('retrieve_and_rank.create_config()')
        print ('Response:')
        print (output)

{
    "message": "WRRCSR026: Successfully uploaded named config [rr_android_config] for Solr cluster [scdc06a2ff_d53c_47d7_bd93_9ab8c17b77d3].",
    "statusCode": 200
}


In [28]:
#create a collection
curdir = os.getcwd()

#loading credentials
credFilePath = curdir+'/answer-retrieval-master/config/credentials.json'
credentials = ''
with open(credFilePath) as credFile:
    credentials = json.load(credFile)
USERNAME=credentials['username']
PASSWORD=credentials['password']

#get cluster id and config name from the loaded credentials in Step 1
SOLR_CLUSTER_ID=credentials['cluster_id']
CONFIG_NAME=credentials['config_name']
found = False

#please provide the collection name
COLLECTION_NAME="rr_andriod_collection"   #Do not add spaces or special characters

retrieve_and_rank = RetrieveAndRankV1(
    username=USERNAME,
    password=PASSWORD)

#Check to see if collection exists
output = retrieve_and_rank.list_collections(solr_cluster_id=SOLR_CLUSTER_ID)

try:
    collections = output['collections']
    for i in range(len(collections)):
        collection_name = collections[i]
        if collection_name == COLLECTION_NAME:
            found = True
except:
    print ('Command:')
    print ('retrieve_and_rank.list_collections')
    print ('Response:')
    print (output) 
    
if found:
    print ("Collection "+COLLECTION_NAME+" already exists.")
    print (json.dumps(output, sort_keys=True, indent=4))
else:
    #Running command to create a new collection
    output = retrieve_and_rank.create_collection(SOLR_CLUSTER_ID, COLLECTION_NAME, CONFIG_NAME)
    try:
        print (json.dumps(output, sort_keys=True, indent=4))
        credentials['collection_name'] = COLLECTION_NAME
        with open(credFilePath, 'w') as credFileUpdated:
            json.dump(credentials, credFileUpdated)
    except:
        print ('Command:')
        print ('retrieve_and_rank.create_collection()')
        print ('Response:')
        print (output)

{
    "responseHeader": {
        "QTime": 11088,
        "status": 0
    },
    "success": {
        "10.176.42.104:6473_solr": {
            "core": "rr_andriod_collection_shard1_replica2",
            "responseHeader": {
                "QTime": 2403,
                "status": 0
            }
        },
        "10.176.42.208:5089_solr": {
            "core": "rr_andriod_collection_shard1_replica1",
            "responseHeader": {
                "QTime": 2791,
                "status": 0
            }
        }
    }
}


In [30]:
#populate the collection
curdir = os.getcwd()
 
#get cluster id and collection name from the loaded credentials in Step 1
BASEURL=credentials['url']
SOLRURL= BASEURL+"solr_clusters/"
USERNAME=credentials['username']
PASSWORD=credentials['password']
SOLR_CLUSTER_ID=credentials['cluster_id']
COLLECTION_NAME=credentials['collection_name']

retrieve_and_rank = RetrieveAndRankV1(
    username=USERNAME,
    password=PASSWORD)

#please make sure your documents are in data/content/solrDocuments.json or modify the path below
SOLR_DOCUMENTS_PATH=curdir+"/answer-retrieval-master/data/content/solrDocuments.json"

#Get Solr Client Handle
pysolr_client = retrieve_and_rank.get_pysolr_client(SOLR_CLUSTER_ID, COLLECTION_NAME)

#Add documents
with open(SOLR_DOCUMENTS_PATH) as data_file:    
    data = json.load(data_file)
output = pysolr_client.add(data)

#Running command that index documents
try:
    print (output)
except:
    print ('Command:')
    print ('pysolr_client.add()')
    print ('Response:')
    print (output)

<?xml version="1.0" encoding="UTF-8"?>
<response>
<lst name="responseHeader"><int name="status">0</int><int name="QTime">15493</int></lst>
</response>



In [31]:
# training the retrival
curdir = os.getcwd()

#loading credentials
credFilePath = curdir+'/answer-retrieval-master/config/credentials.json'
credentials = ''
with open(credFilePath) as credFile:
    credentials = json.load(credFile)

BASEURL=credentials['url']
SOLRURL= BASEURL+"solr_clusters/"
USERNAME=credentials['username']
PASSWORD=credentials['password']
SOLR_CLUSTER_ID=credentials['cluster_id']
COLLECTION_NAME=credentials['collection_name']

retrieve_and_rank = RetrieveAndRankV1(
    username=USERNAME,
    password=PASSWORD)

#please provide the query to test
QUESTION="what is the best city to visit in Brazil"

#please provide the number of documents the query should return
MAX_DOCUMENTS=10

#Running command that queries Solr
QUESTION = QUESTION.replace(" ","%20")

#Invoke Solr Search using python Solr client
pysolr_client = retrieve_and_rank.get_pysolr_client(SOLR_CLUSTER_ID, COLLECTION_NAME)
output = pysolr_client.search('*',fq=QUESTION,rows=10)

try:
    print (output.docs)
except:
    print ('Command:')
    print ('pysolr_client.search()')
    print ('Response:')
    print (output)
 

[{'answer': ["There are a few ways to visit Antarctica. Remember that nothing is ever guaranteed strong and it's quite possible with weather that you might not make it there. Cruises strong Larger cruises often will get you close but not to the land. You'll have the comforts of cruising. These big ships may be regulated soon. Smaller ships will often let you get right onto land. Costs start at least 5k for the ultra cheap . Tailored Expeditions strong Many companies run specialty expeditions which let you visit Antarctica exactly how you want. Prices are extremely high for this. Airborne strong You get to fly over Antarctica. This is the more ecological way to see it but doesn't have the same experience as actually walking on it. This is also the cheapest option. "], 'subtitle': ['A year ago I was reading some magazine and found out that there is availability to get a trip to Antarctica. Unfortunately there was no info about how I could get there. Do you know anything about it? Best wa

In [None]:
#Generating the training data
curdir = os.getcwd()

#loading credentials
credFilePath = curdir+'/answer-retrieval-master/config/credentials.json'
with open(credFilePath) as credFile:
    credentials = json.load(credFile)

In [71]:
BASEURL=credentials['url']
SOLRURL= BASEURL+"solr_clusters/"
RANKER_URL=BASEURL+"rankers"
USERNAME=credentials['username']
PASSWORD=credentials['password']
SOLR_CLUSTER_ID=credentials['cluster_id']
COLLECTION_NAME=credentials['collection_name']
TRAIN_FILE_PATH='C:/Users/Wenting\ Li/answer-retrieval-master/bin/python' 
GROUND_TRUTH_FILE="C:/Users/Wenting\ Li/answer-retrieval-master/data/groundtruth/answerGT_train.csv"

#Running command that trains a ranker 
#os.chdir('answer-retrieval-master/bin/python')
cmd = 'python train.py -u %s:%s -i %s -c %s -x %s -n %s' %\
    (  USERNAME, PASSWORD, GROUND_TRUTH_FILE, SOLR_CLUSTER_ID, COLLECTION_NAME, "travel_ranker")
#os.chdir(curdir)
try:
    process = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
    output = process.communicate()[0]
    print (output)
except:
    print ('Command:')
    print (cmd)
    print ('Response:')
    print (output)

b''


In [69]:
os.chdir('../Users')
print (curdir)

C:\Users\Wenting Li\answer-retrieval-master\bin\python
