In [None]:
import openai
from azure.storage.blob import BlobServiceClient
import pickle
import faiss
import numpy as np
import os

In [12]:
from azure.storage.blob import BlobServiceClient

# Define Blob Storage details
account_name = "XXX"
account_key = "XXX"
AZURE_STORAGE_CONNECTION_STRING = f"DefaultEndpointsProtocol=https;AccountName={account_name};AccountKey={account_key};EndpointSuffix=core.windows.net"
CONTAINER_NAME = 'XXXX'

# Initialize Blob Storage client
blob_service_client = BlobServiceClient.from_connection_string(AZURE_STORAGE_CONNECTION_STRING)
container_client = blob_service_client.get_container_client(CONTAINER_NAME)

# Upload text files to Blob Storage
def upload_documents_to_blob(documents):
    try:
        for i, (file_name, content) in enumerate(documents.items()):
            blob_client = container_client.get_blob_client(file_name)
            blob_client.upload_blob(content, overwrite=True)
            print(f"Uploaded {file_name} to Blob Storage.")
    except Exception as e:
        print(f"Error uploading documents: {e}")
        raise

# Example documents
documents = {
    "ai_impact.txt": "Artificial intelligence is transforming various industries. Brand:Tahoe, Model:T21",
    "climate_change.txt": "Climate change is one of the most pressing global challenges. Brand:Tracker, Model:DXL TR21",
    "space_exploration.txt": "Recent advancements in space technology are remarkable. Brand:Regency, Model:RX 23",
    "quantum_computing.txt": "The impact of quantum computing on cryptography is profound. Brand:Mako, Model:M233",
    "sustainable_energy.txt": "Sustainable energy solutions are crucial for combating climate change.Brand:Nitro, Model:NN44"
}

upload_documents_to_blob(documents)


Uploaded ai_impact.txt to Blob Storage.
Uploaded climate_change.txt to Blob Storage.
Uploaded space_exploration.txt to Blob Storage.
Uploaded quantum_computing.txt to Blob Storage.
Uploaded sustainable_energy.txt to Blob Storage.


In [13]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import SearchIndex, SimpleField, SearchFieldDataType, SearchableField
from azure.core.credentials import AzureKeyCredential

# Azure Cognitive Search details
search_service_name = "XXXX"
admin_api_key = "XXXX"
index_name = "XXXX" #define new name

# Initialize Search Index client
endpoint = f"https://{search_service_name}.search.windows.net"
credential = AzureKeyCredential(admin_api_key)
index_client = SearchIndexClient(endpoint=endpoint, credential=credential)

# Define the index schema
def create_search_index():
    try:
        fields = [
            SimpleField(name="id", type=SearchFieldDataType.String, key=True),
            SearchableField(name="content", type=SearchFieldDataType.String),
            SearchableField(name="title", type=SearchFieldDataType.String),
            SearchableField(name="author", type=SearchFieldDataType.String),
            SimpleField(name="date", type=SearchFieldDataType.String),
            SearchableField(name="category", type=SearchFieldDataType.String),
            SearchableField(name="summary", type=SearchFieldDataType.String),
            SearchableField(name="brand", type=SearchFieldDataType.String),
            SearchableField(name="model", type=SearchFieldDataType.String)
        ]
        
        index = SearchIndex(name=index_name, fields=fields)
        index_client.create_index(index)
        print("Search index created successfully.")
    except Exception as e:
        print(f"Error creating search index: {e}")
        raise

create_search_index()


Search index created successfully.


In [14]:
from azure.search.documents import SearchClient
import re

def get_brand_model(text):

    # Regular expression to find Brand and Model
    brand_pattern = r"Brand:([A-Za-z0-9]+)"
    model_pattern = r"Model:([A-Za-z0-9]+)"

    # Finding Brand and Model using regular expressions
    brand = re.search(brand_pattern, text)
    model = re.search(model_pattern, text)

    # Extracting the values
    brand_value = brand.group(1) if brand else None
    model_value = model.group(1) if model else None

    print(f"Brand: {brand_value}")
    print(f"Model: {model_value}")

    return brand_value,model_value

def index_documents():
    try:
        search_client = SearchClient(
            endpoint=endpoint,
            index_name=index_name,
            credential=credential
        )

        documents = []
        container_client = blob_service_client.get_container_client(CONTAINER_NAME)
        
        # Iterate over blobs in the container and create documents
        blob_list = container_client.list_blobs()
        for blob in blob_list:
            blob_client = container_client.get_blob_client(blob)
            content = blob_client.download_blob().readall().decode('utf-8')

            brand, model = get_brand_model(content)
            # Assuming each document has metadata
            doc = {
                "id": blob.name.split('.')[0],  # Using file name as id
                "content": content,
                "title": "Sample Title",  # Replace with actual metadata
                "author": "Author Name",
                "date": "2024-07-25",
                "category": "Category",
                "summary": "Summary of the document",
                "brand":brand,
                "model":model
            }
            print("documents", doc)

            documents.append(doc)
        # Upload documents to the index
        search_client.upload_documents(documents)
        print("Documents indexed successfully.")
    except Exception as e:
        print(f"Error indexing documents: {e}")
        raise

index_documents()


Brand: Tahoe
Model: T21
documents {'id': 'ai_impact', 'content': 'Artificial intelligence is transforming various industries. Brand:Tahoe, Model:T21', 'title': 'Sample Title', 'author': 'Author Name', 'date': '2024-07-25', 'category': 'Category', 'summary': 'Summary of the document', 'brand': 'Tahoe', 'model': 'T21'}
Brand: Tracker
Model: DXL
documents {'id': 'climate_change', 'content': 'Climate change is one of the most pressing global challenges. Brand:Tracker, Model:DXL TR21', 'title': 'Sample Title', 'author': 'Author Name', 'date': '2024-07-25', 'category': 'Category', 'summary': 'Summary of the document', 'brand': 'Tracker', 'model': 'DXL'}
Brand: Mako
Model: M233
documents {'id': 'quantum_computing', 'content': 'The impact of quantum computing on cryptography is profound. Brand:Mako, Model:M233', 'title': 'Sample Title', 'author': 'Author Name', 'date': '2024-07-25', 'category': 'Category', 'summary': 'Summary of the document', 'brand': 'Mako', 'model': 'M233'}
Brand: Regency
M

In [16]:
def search(query_text, top_k=2):
    try:
        search_client = SearchClient(
            endpoint=endpoint,
            index_name=index_name,
            credential=credential
        )
        
        results = search_client.search(query_text, top=top_k)
        return [(result["content"], result, result["@search.score"]) for result in results]
    except Exception as e:
        print(f"Error performing search: {e}")
        raise

# Example usage
query_text = "tahoe"
results = search(query_text)

for result in results:
    text, metadata, score = result
    print(f"Text: {text}")
    print(f"Title: {metadata['title']}")
    print(f"Author: {metadata['author']}")
    print(f"Date: {metadata['date']}")
    print(f"Category: {metadata['category']}")
    print(f"Summary: {metadata['summary']}")
    print(f"Score: {score}\n")


Text: Artificial intelligence is transforming various industries. Brand:Tahoe, Model:T21
Title: Sample Title
Author: Author Name
Date: 2024-07-25
Category: Category
Summary: Summary of the document
Score: 0.9808292

