# Hybrid search demo

In [2]:
import os, pandas as pd
from sqlalchemy import create_engine, text

In [3]:
username = 'demo'
password = 'demo'
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '1972' 
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"

In [4]:
engine = create_engine(CONNECTION_STRING)
connection = engine.connect()

In [None]:
## Creating the table

run the `sql_demo.ipynb` notebook first

```SQL
LOAD DATA FROM FILE '~/data/scotch_review.csv'
        COLUMNS (
            id INT,
            name VARCHAR(255),
            category VARCHAR(255),
            review_point INT,
            price DOUBLE,
            currency VARCHAR(10),
            description VARCHAR(2000)
        )
        INTO hybrid.scotch_reviews (name, category, review_point, price, description)
            VALUES (name, category, review_point, price, description)
        USING { "from" : { "file" : { "header" : 1 } } }
```

TODO: fix issue with DB-API parsing and include file in container

In [5]:
res = connection.execute(text("""CREATE INDEX ifind ON scotch_reviews(description) AS %iFind.Index.Basic"""))

In [27]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2') 
search_vector = model.encode("vanilla", normalize_embeddings=True).tolist()

In [33]:
sql = text("""
    WITH 
    
    filtered AS (
       SELECT %ID AS ID, * FROM scotch_reviews AS r
       WHERE %ID %FIND search_index(ifind, 'vanilla')
    ),
    
    scored AS (
       SELECT name, category, description,
          hybrid.scotchreviews_ifindrank(ID, 'vanilla') AS IFindScore,
          vector_cosine(description_vector, TO_VECTOR(:search_vec ,DOUBLE)) AS VectorScore
       FROM filtered
    ), 
    
    with_rank AS (
       SELECT *,
          RANK() OVER (ORDER BY IFindScore DESC) AS IFindRank,
          RANK() OVER (ORDER BY VectorScore DESC) AS VectorRank
       FROM scored
    ),

    -- using k = 10
    full_score AS (
        SELECT *, (1/(IFindRank + 10) + 1/(VectorRank + 10)) AS Score
        FROM with_rank
    )
    
    SELECT * 
    FROM full_score 
    ORDER BY Score desc""")

result = connection.execute(sql, { "search_vec": str(search_vector) }).fetchall()
df = pd.DataFrame(result)
pd.set_option('display.max_colwidth', None)  # Easier to read description
df.head()

Unnamed: 0,name,category,description,IFindScore,VectorScore,IFindRank,VectorRank,Score
0,"Glen Scotia 15 year old, 46%",Single Malt Scotch,"This 15 year old has been fully matured in bourbon casks. Relatively reticent on the nose, with light vanilla. Becoming more aromatic and spicy, with tinned apricots in syrup. Medium-bodied, relatively dry, with ginger, oak, and cloves, before a touch of milk chocolate appears. The chocolate darkens. Aniseed and perpetual spice.",0.019189351154854,0.449055,8,5,0.12222222222222222
1,"Mortlach, 1997 vintage, 57.1%",Single Malt Scotch,"Matured in a bourbon cask. Thick and creamy, with mouth-coating vanilla, ripe barley, toasted marshmallow, vanilla wafer, key lime pie, golden delicious apple, lemongrass, and hay. The vanilla sweetness lingers to the finish, mixing with dried herbs and hay. I was expecting more from a carefully chosen Mortlach, given its pedigree, but this is still nice. (240 bottles) £250",0.0081081765443045,0.527266,183,1,0.09609043805934996
2,"Lombard Jewels of Scotland (distilled at Springbank) 21 year old 1991 Cask No. 172, 49.7%",Single Malt Scotch,"Aged in a bourbon hogshead, allowing the distillery character to shine through. Fresh, lively and inviting (especially for its age), with a complex array of tropical and summer fruit, peppered with brine, vanilla, and a hint of baker’s chocolate. An oily texture adds weight. Dry, deliciously appetizing finish. An excellent aperitif whisky, but enjoyable anytime. (D & M Wines and Liquors exclusive.)",0.0302989755076642,0.273408,1,504,0.0928546162009197
3,"Dewar’s 18 year old The Vintage, 40%",Blended Scotch Whisky,"Vanilla laced with spice, fondant icing, grapefruit peel, and lime zest leave the vanilla and floral notes lower down the pecking order. In the mouth, grapefruit and orange dominate the vanilla, yet the mouthfeel is thinner and the acidity tips toward the taste of bitter orange seeds. A lingering bitter orange finish.",0.0,0.468414,292,2,0.08664459161147901
4,"Scotch Malt Whisky Society (28:23) 21 year old, 57.5%",Single Malt Scotch,"This bottling of Tullibardine was distilled in November 1989 and has been matured in a refill sherry butt. The U.S. allocation is 120 bottles. Initially, damp earth on the nose, sweetening to milk chocolate-covered fudge and vanilla, along with hazelnuts. Notes of plum and black pepper with time. The palate is fruity, with toffee, spice, and cedar wood notes. Long in the finish, with cereal and spicy resin.",0.0267758388207265,0.288469,2,435,0.08558052434456928
