In [1]:
!pip install faiss_cpu
!pip install python-dotenv



In [2]:
import os
import pickle
import faiss
import numpy as np
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables from the .env file
load_dotenv()

# Get API key from the environment
openai_api_key = os.getenv("OPENAI_API_KEY")

# Initialize OpenAI client
client = OpenAI(api_key=openai_api_key)

# Load FAISS index
index = faiss.read_index("knowledge_index.faiss")

# Load chunks from pickle
with open("chunks_new.pkl", "rb") as f:
    chunks = pickle.load(f)

In [18]:
def retrieve_relevant_chunks(query, index, chunks, k=5, model="text-embedding-3-small", min_score=0.7):
    response = client.embeddings.create(
        input=[query],
        model=model
    )
    query_vector = np.array(response.data[0].embedding, dtype='float32').reshape(1, -1)

    distances, indices = index.search(query_vector, k)
    
    results = []
    for dist, idx in zip(distances[0], indices[0]):
        if dist >= min_score:
            results.append((dist, chunks[idx]))
    
    return results

In [22]:
appendix = "Google Stock, Apple Stock Rise 40 Million"
results = retrieve_relevant_chunks(appendix, index, chunks)
for score, chunk in results:
    print(f"Score: {score:.4f} | Chunk: {chunk}\n")

Score: 1.1647 | Chunk: === Abuse of rank ===

Score: 1.2457 | Chunk: Bullying is repeated acts over time that involves a real or perceived imbalance of power with the more powerful individual or group attacking those who are less powerful. Bullying may consist of three basic types of abuse – verbal, physical and emotional. It typically involves subtle methods of coercion such as intimidation. Bullying can be defined in many different ways. Although the UK currently has no legal definition of bullying, some US states have laws against it. Bullying is usually done to coerce others by fear or threat.

Score: 1.2638 | Chunk: A cause of action in tort arising from one party making a malicious and deliberate misuse or perversion of regularly issued court process (civil or criminal) not justified by the underlying legal action.

Score: 1.2813 | Chunk: === Child abuse ===

Score: 1.3210 | Chunk: === Pool F ===

