# Exploration of superlinked package

In [1]:
import pandas as pd
import superlinked
import asyncio
import boto3
from botocore.exceptions import  ClientError
import instructor
from openai import OpenAI, AsyncOpenAI
import json


# from tenacity import retry, stop_after_attempt, wait_fixed
# import json
# import enum
# from tqdm.asyncio import tqdm_asyncio
# from pydantic import BaseModel, Field
# from .base_food_item import FoodItem

In [2]:
from datasets import load_dataset

ds = load_dataset("deccan-ai/insuranceQA-v2")


In [24]:
def get_secret():
    """
    Retrieves the OpenAI API key from AWS Secrets Manager.

    Returns:
        dict: A dictionary containing the OpenAI API key.
    """
    secret_name = "open_ai_api_key_shahar"
    region_name = "eu-west-1"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    
    get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
  
    secret = get_secret_value_response['SecretString']
  
    return  json.loads(secret)

## define schema

In [3]:
from superlinked import framework as sl


# Define schema

class FAQ(sl.Schema):
    id: sl.IdField
    question: sl.String
    answer: sl.String

# Create FAQ instance and spaces
faq = FAQ()
question_space = sl.TextSimilaritySpace(text=faq.question, model="all-MiniLM-L6-v2")
answer_space = sl.TextSimilaritySpace(text=faq.answer, model="all-MiniLM-L6-v2")
index = sl.Index([question_space])

# Load and prepare data
train_data = ds['train']
df = pd.DataFrame(train_data[:1000])
df.columns = ['question', 'answer']
df.head()


Unnamed: 0,question,answer
0,What Does Medicare IME Stand For?,According to the Centers for Medicare and Medi...
1,Is Long Term Care Insurance Tax Free?,"As a rule , if you buy a tax qualified long te..."
2,Can Husband Drop Wife From Health Insurance?,Can a spouse drop another spouse from health i...
3,Is Medicare Run By The Government?,Medicare Part A and Part B is provided by the ...
4,Is Medicare Run By The Government?,Definitely . It is ran by the Center for Medic...


In [23]:
# Sample 100 random entries from the DataFrame
df = df.sample(100)

# Construct a query to find similar questions in the FAQ
query = (
    sl.Query(index)
    .find(faq)
    .similar(question_space, sl.Param("query_text"))
    .select_all()
)

# Create an in-memory data source for the FAQ
source = sl.InMemorySource(faq)

# Set up an in-memory executor with the source and index
executor = sl.InMemoryExecutor(sources=[source], indices=[index])

# Run the application using the executor
app = executor.run()

# Insert the sampled data into the source
source.put(
    df.reset_index(names="id")[["id", "question", "answer"]].to_dict(orient="records")
)

# Execute the query with a specific query text
result = app.query(query, query_text="what are annuitied invested in?")

sl.PandasConverter.to_pandas(result)

Unnamed: 0,question,answer,id,similarity_score
0,What Are Fixed Annuities Invested In?,The dominate position of insurance portfolios ...,529,0.856072
1,What Are Annuities?,An annuity is an insurance product . A life in...,469,0.821291
2,What Are Annuities?,Annuities are a stream of steady income paymen...,467,0.821291
3,Who Bears The Investment Risk In A Variable An...,"In Canada , Variable Annuities are called Segr...",301,0.687399
4,How Many Kinds Of Annuities Are There?,There are annuity policies that are designed f...,280,0.683913
...,...,...,...,...
95,What Is The Enrollment Period For Medicare Par...,"Generally , it is 3 months prior to turning 65...",243,0.066035
96,How Old Should I Be To Get Long Term Care Insu...,The majority of my clients who purchased long ...,506,0.065188
97,What Information Do I Need To Get Car Insurance?,To get car insurance you would need at minimum...,320,0.063996
98,Do I Need Proof Of Insurance To Register A Car...,As of today according to the Wisconsin Departm...,37,0.049803


## Adding a llm

In [32]:
import instructor
import openai
from pydantic import BaseModel

api_key = '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'
client = instructor.patch(OpenAI(api_key=api_key))

class RelevanceCheck(BaseModel):
    relevant: bool

def is_relevant(user_q: str, candidate_q: str) -> bool:
    prompt = f"""
User question: {user_q}

Matched FAQ: {candidate_q}

Is the matched FAQ question relevant to the user question? Only answer True or False.
"""
    result = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        response_model=RelevanceCheck,
    )
    return result.relevant




In [33]:
is_relevant("what are annuitied invested in?", "what are annuitied invested in?")

InstructorRetryException: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [19]:
sl.PandasConverter.to_pandas(result)

Unnamed: 0,question,answer,id,similarity_score
0,What Are Fixed Annuities Invested In?,The dominate position of insurance portfolios ...,529,0.856072
1,What Are Annuities?,Annuities are a stream of steady income paymen...,467,0.821291
2,What Are Annuities?,An annuity is an insurance product . A life in...,469,0.821291
3,Who Bears The Investment Risk In A Variable An...,"In Canada , Variable Annuities are called Segr...",301,0.687399
4,How Many Kinds Of Annuities Are There?,"Simply put , there are just a few . Fixed Annu...",283,0.683913
...,...,...,...,...
95,What Is The Enrollment Period For Medicare Par...,"Generally , it is 3 months prior to turning 65...",243,0.066035
96,How Old Should I Be To Get Long Term Care Insu...,The majority of my clients who purchased long ...,506,0.065188
97,What Information Do I Need To Get Car Insurance?,To get car insurance you would need at minimum...,320,0.063996
98,Do I Need Proof Of Insurance To Register A Car...,As of today according to the Wisconsin Departm...,37,0.049803
