## 1. Indexing sample dataset

For search metadata using filter, it must be set `filterable`

In [1]:
from dotenv import load_dotenv
load_dotenv("../.env")

False

In [2]:
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents import SearchClient  
from azure.search.documents.indexes import SearchIndexClient  
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,
    ComplexField,
    SearchIndex,  
)

In [3]:
import os
service_endpoint = os.getenv("AZSCH_ENDPOINT")  
credential = AzureKeyCredential(os.environ["AZSCH_KEY"])

#print(service_endpoint)

In [4]:
# Create a search index
def create_search_index(index_name):
    index_client = SearchIndexClient(
        endpoint=service_endpoint, credential=credential)
    fields = [
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchableField(name="title", type=SearchFieldDataType.String,
                        searchable=True, retrievable=True,
                        analyzer_name="ko.microsoft"),
        SearchableField(name="content", type=SearchFieldDataType.String,
                        searchable=True, retrievable=True,
                        analyzer_name="ko.microsoft"),
        SearchableField(name="date", type=SearchFieldDataType.DateTimeOffset,
                        searchable=False, retrievable=True,
                        filterable=True, sortable=True),
        SearchField(name="amount", type=SearchFieldDataType.Int32),
        SearchableField(name="category", type=SearchFieldDataType.String,
                        searchable=False, retrievable=True,
                        facetable=True, filterable=True),
        ComplexField(name="user", fields=[
            SearchableField(name="name", type=SearchFieldDataType.String,
                            filterable=True ),
            SearchableField(name="dept", type=SearchFieldDataType.String,
                            filterable=True)
        ])
    ]

    # Create the search index
    index = SearchIndex(name=index_name, fields=fields)
    result = index_client.create_or_update_index(index)
    print(f' {result.name} created')

In [5]:
index_name = "gsexpense-index"
create_search_index(index_name)

 gsexpense-index created


In [6]:
search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)

In [7]:
import json

with open("./sample_expense.json", "r") as f:
    data = json.load(f)

print(json.dumps(data, indent=4, ensure_ascii=False))

[
    {
        "id": "ID0001",
        "title": "식대비",
        "metadata": {
            "date": "2024-12-14T20:00:00Z",
            "amount": 15000,
            "category": "meal",
            "dept": "재무팀"
        },
        "content": "식대비 품의서입니다."
    },
    {
        "id": "ID0002",
        "title": "교육비 품위서",
        "metadata": {
            "date": "2025-01-10T18:00:00Z",
            "amount": 300000,
            "category": "tuition",
            "dept": "인사팀"
        },
        "content": "교육비 품의서입니다."
    },
    {
        "id": "ID0003",
        "title": "교육비(회계팀) 품위서",
        "metadata": {
            "date": "2025-01-11T18:00:00Z",
            "amount": 1100000,
            "category": "tuition",
            "dept": "회계팀"
        },
        "content": "교육비 품의서입니다."
    },
    {
        "id": "ID0004",
        "title": "사무용품 품위서",
        "metadata": {
            "date": "2025-01-21T18:00:00Z",
            "amount": 30000000,
            "category": "office",
           

In [8]:
for row in data:
    documents = [{'id': row['id'], 
                     'title': row['title'], 
                     'content': row['content'],
                     'date': row['metadata']['date'],
                     "amount": row['metadata']["amount"],
                     'category': row['metadata']["category"],
                     'user': {
                        'name': '홍길동',
                        'dept': row['metadata']["dept"]}
                     }]
    
    result = search_client.upload_documents(documents)  

In [10]:
from azure.search.documents.models import (
    QueryType,
    SearchMode
)

def search_query(query, filter=None):

    results = search_client.search(  
        search_text=query,
        search_fields=["content", "title"],
        select=["id", "content", "title", "date", "category", "amount", "user"],
        filter=filter,
        query_language="ko-kr",
        top=10 # for limiting text search
    ) 
    
    print("Search Results:")
    for i, result in enumerate(results, 1): 
        print(f"{i}) {result['@search.score']:.3f}: {result['id']}, {result['title']}, {result['date']}, {result['amount']}, {result['category']}, {result['user']['dept']}")  


In [11]:
search_query("*", filter="date gt '2025-01-01T09:00:00Z'")

Search Results:
1) 1.000: ID0006, 공사비 품위서 2, 2025-02-04T18:00:00Z, 10000000, construction, 개발2팀
2) 1.000: ID0003, 교육비(회계팀) 품위서, 2025-01-11T18:00:00Z, 1100000, tuition, 회계팀
3) 1.000: ID0002, 교육비 품위서, 2025-01-10T18:00:00Z, 300000, tuition, 인사팀
4) 1.000: ID0004, 사무용품 품위서, 2025-01-21T18:00:00Z, 30000000, office, R&D팀
5) 1.000: ID0005, 공사비 품위서, 2025-02-03T18:00:00Z, 30000000, construction, 개발1팀
6) 1.000: ID0007, 해외 출장비 품위서, 2025-02-10T20:00:00Z, 5000000, travel, 개발2팀


In [12]:
search_query("*", filter="category eq 'meal'")

Search Results:
1) 1.000: ID0001, 식대비, 2024-12-14T20:00:00Z, 15000, meal, 재무팀


In [13]:
search_query("*", filter="user/dept eq '개발1팀'")

Search Results:
1) 1.000: ID0005, 공사비 품위서, 2025-02-03T18:00:00Z, 30000000, construction, 개발1팀


In [14]:
search_query("*", filter="amount ge 100000")

Search Results:
1) 1.000: ID0006, 공사비 품위서 2, 2025-02-04T18:00:00Z, 10000000, construction, 개발2팀
2) 1.000: ID0003, 교육비(회계팀) 품위서, 2025-01-11T18:00:00Z, 1100000, tuition, 회계팀
3) 1.000: ID0002, 교육비 품위서, 2025-01-10T18:00:00Z, 300000, tuition, 인사팀
4) 1.000: ID0004, 사무용품 품위서, 2025-01-21T18:00:00Z, 30000000, office, R&D팀
5) 1.000: ID0005, 공사비 품위서, 2025-02-03T18:00:00Z, 30000000, construction, 개발1팀
6) 1.000: ID0007, 해외 출장비 품위서, 2025-02-10T20:00:00Z, 5000000, travel, 개발2팀


In [15]:
search_query("*", filter="date gt '2024-01-01' and amount ge 1000000")

Search Results:
1) 1.000: ID0006, 공사비 품위서 2, 2025-02-04T18:00:00Z, 10000000, construction, 개발2팀
2) 1.000: ID0003, 교육비(회계팀) 품위서, 2025-01-11T18:00:00Z, 1100000, tuition, 회계팀
3) 1.000: ID0004, 사무용품 품위서, 2025-01-21T18:00:00Z, 30000000, office, R&D팀
4) 1.000: ID0005, 공사비 품위서, 2025-02-03T18:00:00Z, 30000000, construction, 개발1팀
5) 1.000: ID0007, 해외 출장비 품위서, 2025-02-10T20:00:00Z, 5000000, travel, 개발2팀


In [16]:
search_query("*", filter="date gt '2024-01-01' and amount ge 1000000 and category eq 'office'")

Search Results:
1) 1.000: ID0004, 사무용품 품위서, 2025-01-21T18:00:00Z, 30000000, office, R&D팀


In [17]:
search_query("*", filter="date gt '2025-01-01' and date lt '2025-02-01T00:00:00'")

Search Results:
1) 1.000: ID0003, 교육비(회계팀) 품위서, 2025-01-11T18:00:00Z, 1100000, tuition, 회계팀
2) 1.000: ID0002, 교육비 품위서, 2025-01-10T18:00:00Z, 300000, tuition, 인사팀
3) 1.000: ID0004, 사무용품 품위서, 2025-01-21T18:00:00Z, 30000000, office, R&D팀
