In [1]:
import requests
import os
import pandas as pd
from dotenv import load_dotenv
import time

# Load environment variables
load_dotenv()
api_key = os.getenv("EMBEDDING_KEY")
endpoint = os.getenv("EMBEDDING_END_POINT")

# Define the URL for the embeddings endpoint
url = f"{endpoint}/openai/deployments/text-embedding-ada-002/embeddings?api-version=2024-04-01-preview"

# Define the headers
headers = {
    "api-key": api_key,
    "Content-Type": "application/json"
}

# Define a function to format each row into a string for embedding
def format_row(row):
    return (
        f"CustomerID: {row['CustomerID']}, "
        f"Age: {row['Age']}, "
        f"Gender: {row['Gender']}, "
        f"Marital Status: {row['MaritalStatus']}, "
        f"Occupation: {row['Occupation']}, "
        f"Income Level: {row['IncomeLevel']}, "
        f"Credit Limit: {row['CreditLimit']}, "
        f"Credit Score: {row['CreditScore']}, "
        f"Card Type: {row['CardType']}, "
        f"Years With Bank: {row['YearsWithBank']}, "
        f"Number of Credit Cards: {row['NumberOfCreditCards']}, "
        f"Average Monthly Spending: {row['AverageMonthlySpending']}, "
        f"Late Payments: {row['LatePayments']}, "
        f"Credit Card Usage: {row['CreditCardUsage']}, "
        f"Mobile Banking Usage: {row['MobileBankingUsage']}, "
        f"Customer Satisfaction Rating: {row['CustomerSatisfactionRating']}"
    )

# Load the CSV data
data = pd.read_csv('Synthetic_Cards_Customer.csv')

# Initialize a list to store the embeddings
all_embeddings = []

# Loop through each row, format it, and send it to the embedding endpoint
for index, row in data.iterrows():
    # Format the row data for embedding
    text_for_embedding = format_row(row)
    
    # Define the data payload
    data_payload = {
        "input": text_for_embedding,
        "encoding_format": "float"
    }
    
    # Make the POST request
    response = requests.post(url, headers=headers, json=data_payload)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Extract embeddings from the response
        embeddings = response.json()['data'][0]['embedding']
        all_embeddings.append({
            "CustomerID": row['CustomerID'],
            "Embeddings": embeddings
        })
    else:
        # Print the error message and stop if there's an error
        print(f"Error for CustomerID {row['CustomerID']}: {response.status_code} - {response.text}")
        continue
    
    # To avoid hitting the rate limit, add a delay if necessary
    time.sleep(0.5)  # Adjust sleep time based on API rate limits

# Convert embeddings to a DataFrame
embeddings_df = pd.DataFrame(all_embeddings)

# Save embeddings to a new CSV file
embeddings_df.to_csv('Customer_Embeddings.csv', index=False)

# Display the first few rows of the embeddings
print(embeddings_df.head())


  CustomerID                                         Embeddings
0   CUST0001  [-0.011622304, -0.005285256, 0.008414338, -0.0...
1   CUST0002  [-0.01978636, -0.0051310165, 0.0049531767, -0....
2   CUST0003  [-0.016681174, -0.0058776606, 0.015242014, -0....
3   CUST0004  [-0.01601045, -0.007185017, 0.013766361, -0.03...
4   CUST0005  [-0.019965125, -0.0066177277, 0.004612655, -0....
