In [44]:
import pandas as pd

df = pd.read_csv('acsi_booking.csv')
df['combined_features'] = df['campsite_name'] + ' ' + df['price'].astype(str) + ' ' + df['region']

In [45]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['combined_features'].tolist())

In [46]:
import faiss

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

In [47]:
import re

def parse_price_constraints(query):
    price_constraints = {}
    less_than = re.search(r'less than (\d+)', query)
    more_than = re.search(r'more than (\d+)', query)
    between = re.search(r'between (\d+) and (\d+)', query)
    
    if less_than:
        price_constraints['less_than'] = int(less_than.group(1))
    if more_than:
        price_constraints['more_than'] = int(more_than.group(1))
    if between:
        price_constraints['between'] = (int(between.group(1)), int(between.group(2)))
    
    return price_constraints

def filter_by_price(df, constraints):
    if 'less_than' in constraints:
        df = df[df['price'] < constraints['less_than']]
    if 'more_than' in constraints:
        df = df[df['price'] > constraints['more_than']]
    if 'between' in constraints:
        df = df[(df['price'] >= constraints['between'][0]) & (df['price'] <= constraints['between'][1])]
    
    return df


In [48]:
def search_products(query, top_k=2):
    query_embedding = model.encode([query])
    D, I = index.search(query_embedding, top_k)
    results = df.iloc[I[0]]
    return results

In [None]:
def search_products_with_price(query, top_k=2):
    constraints = parse_price_constraints(query)
    filtered_df = filter_by_price(df, constraints)
    
    if not filtered_df.empty:
        filtered_embeddings = model.encode(filtered_df['combined_features'].tolist())
        filtered_index = faiss.IndexFlatL2(filtered_embeddings.shape[1])
        filtered_index.add(filtered_embeddings)
        
        query_embedding = model.encode([query])
        D, I = filtered_index.search(query_embedding, top_k)
        results = filtered_df.iloc[I[0]]
    else:
        query_embedding = model.encode([query])
        D, I = index.search(query_embedding, top_k)
        results = df.iloc[I[0]]
    
    return results

# test the product_search
print(search_products_with_price('show me some campsites from germany'))