In [None]:
import requests
import json
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient


with open("api/local.settings.json") as read_file:
    config = json.load(read_file)

service_name = config["Values"]["SearchServiceName"]
api_version = config["Values"]["ApiVersion"]
api_key = config["Values"]["SearchApiKey"]

headers = {
    'Content-Type': 'application/json',
    'api-key': api_key
}

index_name = "mosl-demo-index-meta2"
endpoint = "https://search-test-123.search.windows.net"

url = "https://{}.search.windows.net/indexes/{}/docs".format(service_name, index_name)
url += "?api-version={}".format(api_version)
url += "&search=correction"
url += "&$count=true"
#url += '&$top=5'
url += '&highlight=merged_text'
print(url)


: 

In [32]:
search_client = SearchClient(endpoint = endpoint, index_name = index_name, credential = AzureKeyCredential(api_key))
print(search_client)

<SearchClient [endpoint='https://search-test-123.search.windows.net', index='mosl-demo-index-meta2']>


In [3]:
resp = requests.get(url, headers = headers)
print(resp.status_code)

200


In [4]:
search_results = resp.json()

search_results.keys()
search_results["@odata.context"]
search_results["@odata.count"]
len(search_results['value'])
search_results['value'][0].keys()

dict_keys(['@search.score', '@search.highlights', 'id', 'metadata_storage_name', 'content', 'keyPhrases', 'merged_text', 'Test1', 'Test2', 'Test3'])

In [5]:
for result in search_results["value"]:
    print("PDF Name: {}, Search Score {}".format(result["metadata_storage_name"], result["@search.score"])
    )

PDF Name: CSD 0105 V7.0 Clean.pdf, Search Score 11.810898
PDF Name: OSD 0704 V1.0 (Clean).pdf, Search Score 11.32995
PDF Name: CSD 0406 V2.0.pdf, Search Score 9.431417
PDF Name: CSD 0103 V2.0 (clean).pdf, Search Score 7.4143906
PDF Name: CSD 0102 V8.0 (clean).pdf, Search Score 4.6572857
PDF Name: CSD 0202 V7.0 (clean).pdf, Search Score 4.6572857
PDF Name: CSD 0301 V14.0 (Clean).pdf, Search Score 4.147939


In [5]:
print(search_results["value"][0]["merged_text"][:350])


 

 

 

 

Code Subsidiary Document  
No. 0105: 

Error Rectification and 
Retrospective Amendments 
 

 



 

Document reference CSD 01015 Error Rectification & Retrospective Amendments 
Version 7.0  Page 2 of 53 
 
 

Change History 

Version 
Number 

Date of 
Issue 

Reason For 
Change 

Change Control 
Reference 

Sections 
Affected  

V2.0


In [6]:
print("Results Found: {}, Results Returned: {}".format(search_results['@odata.count'], len(search_results['value'])))
print("Highest Search Score: {}".format(search_results['value'][0]['@search.score']))


Results Found: 7, Results Returned: 7
Highest Search Score: 11.810898


In [7]:
search_results['value'][0]['@search.highlights']['merged_text']

['Market Operator Data <em>Correction</em> .......................................................... 20 \n\n5.',
 'Document reference CSD 01015 Error Rectification & Retrospective Amendments \nVersion 7.0  Page 9 of 53 \n \n \n\n2.2.5 For unpaired Supply Points, the Data Owner for the Supply Point is also the \n\nData Owner for the Core Data. \n\n2.3 Types of Data <em>Correction</em> \n\n2.3.1 There are two types of Data <em>Correction</em>.',
 'Immediate Error Rectification \n\n2.4.2 Where an Error Rectification satisfies all of the following conditions: \n\n(a) The proposed Effective From Date for the Data Item is: \n\n(i) After the most recent Effective From Date for that Data Item; and \n\neither within the current Month or the previous two preceding \n\nMonths; and \n\n(b) The current Data Owner has been the Data Owner for the entire period of \n\nthe proposed Data <em>Correction</em>; and \n\n(c) The proposed Data <em>Correction</em> is a change to the Data Item for the entire \

In [9]:
from IPython.display import display, HTML

for highlight in search_results["value"][0]["@search.highlights"]["merged_text"]:
    display(HTML(highlight))

In [93]:
import urllib

search_term = "occupied"

url = "https://{}.search.windows.net/indexes/{}/docs".format(service_name, index_name)
url += "?api-version={}".format(api_version)
url += "&search=" + search_term
url += "&$count=true"
url += '&highlight=merged_text'
url += "&highlightPreTag=" + urllib.parse.quote('<span style = "background-color: #f5e8a3; color: #000000">', safe = "")
url += "&highlightPostTag=" + urllib.parse.quote('</span>', safe = "")

resp = requests.get(url, headers = headers)
print(url)
print(resp.status_code)

search_results = resp.json()

https://search-test-123.search.windows.net/indexes/mosl-demo-index-meta2/docs?api-version=2020-06-30&search=occupied&$count=true&highlight=merged_text&highlightPreTag=%3Cspan%20style%3D%22background-color%3A%23f5e8a3%22%3E&highlightPostTag=%3C%2Fspan%3E
200


In [94]:
for result in search_results["value"]:
    display(HTML("<h3>" + result["metadata_storage_name"] + " (search score: " + str(result['@search.score']) + ")" + "</h3>"))
    display(HTML("<h4>" + "Key phrases: " + "<br>" + "</h4>" + "<h5>" + ' | '.join(result["keyPhrases"][:10]) + "</h5>"))
    for highlight in result["@search.highlights"]["merged_text"]:
        display(HTML(highlight))