<a href="https://colab.research.google.com/github/alecarini/HLT-Project-Exam/blob/main/price_retrieval_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from fuzzywuzzy import process
from bs4 import BeautifulSoup
import requests
import re
import openai
from urllib.parse import urlparse

openai.api_key = "key"

In [None]:
import pandas as pd
import openai
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DataFrameLoader

# Load dataset
def load_data(file_path):
    """Load dataset from an Excel file."""
    return pd.read_excel(file_path)

# Create FAISS index
def create_faiss_index(df):
    """Create FAISS vector index for retrieval."""
    df['text'] = df['medical_service'] + ' ' + df['speciality']
    loader = DataFrameLoader(df[['text', 'min_price', 'max_price']], page_content_column='text')
    documents = loader.load()

    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(documents, embeddings)
    return vectorstore

# Retrieve similar entries
def retrieve_similar(service, speciality, vectorstore):
    """Retrieve similar cases using FAISS."""
    query = f"{service} {speciality}"
    retriever = vectorstore.as_retriever()
    docs = retriever.get_relevant_documents(query)

    if docs:
        best_match = docs[0].metadata  # Best retrieved document
        return best_match['min_price'], best_match['max_price'], "dataset", 10.0
    return None, None, None, 0

# Generate price with OpenAI
def generate_price_with_openai(service, speciality, context=""):
    """Use OpenAI to generate a price estimate when no dataset match is found."""
    try:
        prompt = f"""Sei un esperto in tariffe mediche in Italia.
        Basandoti esclusivamente sul seguente contesto e sulla richiesta, fornisci il range di prezzo medio.

        Contesto:
        {context}

        Richiesta:
        Medical Service: {service}
        Speciality: {speciality}

        Rispondi SOLO con una stringa nel formato esatto:
        {{ min_price=X, max_price=Y, source=openai, confidence=C }}
        """

        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[{"role": "system", "content": prompt}]
        )
        return response['choices'][0]['message']['content']
    except Exception:
        return ""

# Main function to get price
def get_price(service, speciality, vectorstore):
    """Retrieve or generate price estimate."""
    min_price, max_price, source, confidence = retrieve_similar(service, speciality, vectorstore)
    if source:
        return f"{{ min_price={min_price}, max_price={max_price}, source={source}, confidence={confidence} }}"

    return generate_price_with_openai(service, speciality)

# Example usage
df = load_data("medical_prices.xlsx")
vectorstore = create_faiss_index(df)
result = get_price("Visita dermatologica", "Dermatologia", vectorstore)
print(result)
