In [15]:
import os
import requests
import pandas as pd
import time
import json
from dotenv import load_dotenv

# Load environment variables
load_dotenv(override=True)

# Set up Azure Cognitive Search details
AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE")  # e.g., "mysearchservice"
AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX")  # e.g., "customer_index"
AZURE_SEARCH_API_KEY = os.getenv("AZURE_SEARCH_API_KEY")
AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net/indexes/{AZURE_SEARCH_INDEX}/docs/index"
AZURE_SEARCH_HEADERS = {
    "Content-Type": "application/json",
    "api-key": AZURE_SEARCH_API_KEY
}
print(AZURE_SEARCH_ENDPOINT)
print(AZURE_SEARCH_HEADERS)
print(AZURE_SEARCH_API_KEY)
print(AZURE_SEARCH_SERVICE)
print(AZURE_SEARCH_INDEX)

# Function to upload documents to Azure Search
def upload_documents_to_azure_search(documents):
    payload = {
        "value": documents
    }
    response = requests.post(
        AZURE_SEARCH_ENDPOINT,
        headers=AZURE_SEARCH_HEADERS,
        params={"api-version": "2021-04-30-Preview"},
        json=payload
    )
    if response.status_code in [200, 201, 204]:
        print("Documents uploaded successfully.")
    else:
        print(f"Failed to upload documents: {response.status_code} - {response.text}")

# Function to create embeddings from CSV and upload to Azure Search
def create_embeddings_from_csv():
    try:
        # Load CSV data with error handling for missing values
        data = pd.read_csv('Synthetic_Cards_Customer.csv')

        # Ensure required columns are present
        required_columns = [
            'CustomerID', 'Age', 'Gender', 'MaritalStatus', 'Occupation',
            'IncomeLevel', 'CreditLimit', 'CreditScore', 'CardType',
            'YearsWithBank', 'NumberOfCreditCards', 'AverageMonthlySpending',
            'LatePayments', 'CreditCardUsage', 'MobileBankingUsage',
            'CustomerSatisfactionRating'
        ]
        for column in required_columns:
            if column not in data.columns:
                raise ValueError(f"Missing required column: {column}")

        # Process data in smaller batches to avoid memory issues
        batch_size = 10  # Adjust batch size as needed
        for start_idx in range(0, len(data), batch_size):
            batch = data.iloc[start_idx:start_idx + batch_size]

            documents = []
            for index, row in batch.iterrows():
                # Format the row data for embedding
                text_for_embedding = (
                    f"Age: {row['Age']}, "
                    f"Gender: {row['Gender']}, "
                    f"Marital Status: {row['MaritalStatus']}, "
                    f"Occupation: {row['Occupation']}, "
                    f"Income Level: {row['IncomeLevel']}, "
                    f"Credit Limit: {row['CreditLimit']}, "
                    f"Credit Score: {row['CreditScore']}, "
                    f"Card Type: {row['CardType']}, "
                    f"Years With Bank: {row['YearsWithBank']}, "
                    f"Number of Credit Cards: {row['NumberOfCreditCards']}, "
                    f"Average Monthly Spending: {row['AverageMonthlySpending']}, "
                    f"Late Payments: {row['LatePayments']}, "
                    f"Credit Card Usage: {row['CreditCardUsage']}, "
                    f"Mobile Banking Usage: {row['MobileBankingUsage']}, "
                    f"Customer Satisfaction Rating: {row['CustomerSatisfactionRating']}"
                )

                # Define the document to upload
                document = {
                    "@search.action": "upload",
                    "id": str(row['CustomerID']),
                    "content": text_for_embedding,
                    "embedding": []  # Replace with actual embedding if needed
                }
                documents.append(document)

            # Upload documents to Azure Search
            upload_documents_to_azure_search(documents)

            # To avoid hitting the rate limit, add a delay if necessary
            time.sleep(0.5)

        print("Embeddings for all customers have been created and stored in Azure Cognitive Search!")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    create_embeddings_from_csv()


https://creditsearch.search.windows.net/indexes/customer_index/docs/index
{'Content-Type': 'application/json', 'api-key': 'lJl4ErB2aFE4XNXzKzceyMxgRKGAcE9WrGV4IUSFanAzSeCrn4w3'}
lJl4ErB2aFE4XNXzKzceyMxgRKGAcE9WrGV4IUSFanAzSeCrn4w3
creditsearch
customer_index
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploaded successfully.
Documents uploa