## Import the physical store data.

There is one other file that has a different structure. This is the shop-locations file. As the structure is different, we made a special importer. The function generates an array of dictionaries containing the available information for a shop.

In [1]:
from unstructured.partition.md import partition_md
from unstructured.staging.base import convert_to_dict
import os

In [2]:
def extract_store_line(input_string):
    # Split the input string by newline and colon
    lines = input_string.split('\n')
    data_dict = {}

    for line in lines:
        if ':' in line:
            key, value = line.split(':', 1)
            key = key.strip()
            value = value.strip()

            # Replace spaces with underscores in keys and convert to lowercase
            key = key.lower().replace(' ', '_')

            data_dict[key] = value

    return data_dict


def extract_stores():
    filename = os.path.join('./data/', 'help-shop-locations.txt')
    with open(filename, "rb") as f:
        elements = partition_md(filename=filename)

    elements_dict = convert_to_dict(elements)

    available_stores = []
    current_store = None
    for el in elements_dict:
        line_dict = extract_store_line(el["text"])
        if "store_name" in line_dict:
            if current_store is not None:
                available_stores.append(current_store)
            line_dict["opening_hours"] = []
            current_store = line_dict
        else:
            if current_store.get("opening_hours"):
                current_store["opening_hours"].append(line_dict)
            else:
                current_store["opening_hours"] = [line_dict]
    return available_stores

In [3]:
found_stores = extract_stores()
print(found_stores[0])

{'store_name': 'Trendy Finds', 'city': 'Pijnacker', 'street': 'Kerkweg 1', 'telephone': '06-12345678', 'opening_hours': [{'monday': '11:00-18:00'}, {'tuesday': '09:00-18:00'}, {'wednesday': '09:00-18:00'}, {'thursday': '11:00-18:00'}, {'friday': '09:00-18:00'}, {'saturday': '10:00-17:00'}, {'sunday': 'Closed'}]}


In [4]:
from retriever import find_auth_opensearch, OpenSearchClient

config = find_auth_opensearch()
client = OpenSearchClient(config, alias_name="sg-stores")

if client.ping():
    print("We have a connection to the Amazon OpenSearch Cluster")
else:
    print("ERROR: no connection to the Amazon OpenSearch Cluster")

We have a connection to the Amazon OpenSearch Cluster


In [6]:
from retriever import OpenSearchTemplate

template = OpenSearchTemplate(
    client=client,
    index_template_name="sg_stores_index_template",
    component_name_settings="sg_stores_component_settings",
    component_name_dyn_mappings="sg_stores_component_dynamic_mappings",
    component_name_mappings="sg_stores_component_mappings"
)

for result in template.create_update_template():
    print(result)

The version 1 of the component template sg_stores_component_settings is up-to-date
The version 1 of the component template sg_stores_component_dynamic_mappings is up-to-date
The version 1 of the component template sg_stores_component_mappings is up-to-date
The version 1 of the index template is up-to-date


In [7]:
index_name = client.create_index()
print(f"Index created with the name {index_name}")

client.switch_alias_to(index_name=index_name)

Index created with the name sg-stores-20230904152355


In [8]:
import copy

found_stores_for_opensearch = copy.deepcopy(found_stores)
for found_store in found_stores_for_opensearch:
    current_opening_hours = found_store["opening_hours"]
    opening_hours = []
    for i in current_opening_hours:
        times = list(i.values())[0].split('-')
        if len(times) == 2:
            opening_hours.append({"week_day": list(i.keys())[0], "open_time": times[0], "closing_time": times[1]})
        else:
            opening_hours.append({"week_day": list(i.keys())[0], "open_time": "00:00", "closing_time": "00:00"})
    found_store["opening_hours"] = opening_hours

print(found_stores_for_opensearch[0])


{'store_name': 'Trendy Finds', 'city': 'Pijnacker', 'street': 'Kerkweg 1', 'telephone': '06-12345678', 'opening_hours': [{'week_day': 'monday', 'open_time': '11:00', 'closing_time': '18:00'}, {'week_day': 'tuesday', 'open_time': '09:00', 'closing_time': '18:00'}, {'week_day': 'wednesday', 'open_time': '09:00', 'closing_time': '18:00'}, {'week_day': 'thursday', 'open_time': '11:00', 'closing_time': '18:00'}, {'week_day': 'friday', 'open_time': '09:00', 'closing_time': '18:00'}, {'week_day': 'saturday', 'open_time': '10:00', 'closing_time': '17:00'}, {'week_day': 'sunday', 'open_time': '00:00', 'closing_time': '00:00'}]}


In [9]:
import hashlib


def generate_unique_id(input_string):
    sha256_hash = hashlib.sha256(input_string.encode()).hexdigest()
    return sha256_hash


In [13]:
for index_store in found_stores_for_opensearch:
    client.index_document(id=generate_unique_id(index_store["store_name"]), document=index_store, index_name=index_name)

num_shops = client.count_docs(index_name)["count"]

print(f"Found {num_shops} shops in the index")

Found 9 shops in the index


In [15]:
query = {
    "query": {
        "match": {
            "city": "pijnacker"
        }
    }
}

results = client.search(body=query)

print(results)

{'took': 19, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 1.89712, 'hits': [{'_index': 'sg-stores-20230904152355', '_id': '66881903d49c0a4975ff704875e1ef90837b30a4deb8a30acb2ca8ee7ea7a193', '_score': 1.89712, '_source': {'store_name': 'Trendy Finds', 'city': 'Pijnacker', 'street': 'Kerkweg 1', 'telephone': '06-12345678', 'opening_hours': [{'week_day': 'monday', 'open_time': '11:00', 'closing_time': '18:00'}, {'week_day': 'tuesday', 'open_time': '09:00', 'closing_time': '18:00'}, {'week_day': 'wednesday', 'open_time': '09:00', 'closing_time': '18:00'}, {'week_day': 'thursday', 'open_time': '11:00', 'closing_time': '18:00'}, {'week_day': 'friday', 'open_time': '09:00', 'closing_time': '18:00'}, {'week_day': 'saturday', 'open_time': '10:00', 'closing_time': '17:00'}, {'week_day': 'sunday', 'open_time': '00:00', 'closing_time': '00:00'}]}}]}}
