In [None]:
!pip install lotus-ai botocore langchain

## 1. Import Required Libraries

In [None]:
import pandas as pd
from lotus.models import E5Model
import lotus
from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.credentials import Credentials
from langchain.llms.base import LLM
from typing import Optional, List, Dict, Any, Union
import ibm_boto3
from botocore.client import Config
from pydantic import Field, BaseModel

## 2. Granite LLM Implementation

This class implements the custom LLM for IBM Watson Granite 3.0 with TAG formatting.

In [None]:
model_id = "ibm/granite-3-8b-instruct"
parameters = {
            "decoding_method": "sample",
            "max_new_tokens": 1000,
            "min_new_tokens": 1,
            "temperature": 0.7,
            "top_k": 50,
            "top_p": 1,
            "repetition_penalty": 1
        }


api_key = ""
project_id = ""
url = ""

credentials = Credentials(url=url, api_key=api_key)

ibm_model = Model(
    model_id=model_id,
    params=parameters,
    credentials=credentials,
    project_id=project_id
)

class IBMWatsonLLM(LLM):
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        return ibm_model.generate_text(prompt=prompt, guardrails=False)

    @property
    def _llm_type(self) -> str:
        return "ibm_watson"

    def generate_text(self, prompt):
        return self._call(prompt)

## 3. LOTUS Configuration

In [None]:
lm = IBMWatsonLLM()

rm = E5Model(device="cpu")

lotus.settings.configure(lm=lm, rm=rm)

In [None]:
# When needed, we can also alter other configurations managed by the settings module
lotus.settings.keys()

## 4. Load Data

### using pandas dataframe

In [None]:
df = pd.read_csv('movies.csv')
df.head()

### using IBM COS

In [None]:
import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='',
    ibm_auth_endpoint="",
    config=Config(signature_version='oauth'),
    endpoint_url='')

bucket = ''
object_key = 'movies.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_1 = pd.read_csv(body)
df_1.head(10)

## 5. Preprocessing Data

In [7]:
import ast

def clean_text_field(field):
    if pd.isna(field):
        return ""
    if isinstance(field, str):
        try:
            evaluated = ast.literal_eval(field)
            if isinstance(evaluated, list):
                return " ".join(str(item) for item in evaluated)
            elif isinstance(evaluated, dict):
                return " ".join(str(value) for value in evaluated.values())
        except:
            return field
    return str(field)

# Clean the dataframe
def prepare_df_for_semantic_search(df):
    clean_df = df.copy()
    text_columns = ['overview', 'genres', 'keywords', 'cast', 'director', 'title']
    for col in text_columns:
        if col in clean_df.columns:
            clean_df[col] = clean_df[col].apply(clean_text_field)
    
    return clean_df

# Prepare the data
clean_df = prepare_df_for_semantic_search(df)

## 6. Semantic Operators

Semantic operators are a key component in the LOTUS programming model. Semantic operators extend the relational model with AI-based operations that users can compose into powerful, reasoning-based query pipelines over structured and unstructured data.

### 6.1. Semantic Indexing and Semantic Search
Semantic indexing allows us to perform semantic search over a column. Semantic search performs a top K similarity search over a column. 

In [8]:

# Create semantic indices one at a time
# Overview index
try:
    print("Creating overview index...")
    clean_df = clean_df.sem_index("overview", "movie_overview_index")
except Exception as e:
    print(f"Error creating overview index: {str(e)}")

# Title index
try:
    print("\nCreating title index...")
    clean_df = clean_df.sem_index("title", "movie_title_index")
except Exception as e:
    print(f"Error creating title index: {str(e)}")

# Example searches
def safe_semantic_search(df, column, query, k=2):
    try:
        results = df.sem_search(column, query, K=k)
        print(f"\nResults for searching '{query}' in {column}:")
        print(results[['title', column]])
        return results
    except Exception as e:
        print(f"Error performing semantic search: {str(e)}")
        return None

# Example usage:
print("\nPerforming semantic searches...")

# Search by overview
safe_semantic_search(clean_df, "overview", "space exploration")

# Search by title
safe_semantic_search(clean_df, "title", "adventure movies")

Creating overview index...


100%|██████████| 21/21 [01:02<00:00,  2.96s/it]



Creating title index...


100%|██████████| 21/21 [00:05<00:00,  3.73it/s]



Performing semantic searches...


100%|██████████| 1/1 [00:00<00:00, 16.98it/s]



Results for searching 'space exploration' in overview:
          title                                           overview
220  Prometheus  A team of explorers discover a clue to the ori...
549      Sphere  The OSSA discovers a spacecraft thought to be ...


100%|██████████| 1/1 [00:00<00:00, 17.99it/s]


Results for searching 'adventure movies' in title:
                title            title
1428  Superhero Movie  Superhero Movie
305   Treasure Planet  Treasure Planet





Unnamed: 0,index,budget,genres,id,keywords,original_title,overview,popularity,release_date,revenue,runtime,spoken_languages,tagline,title,vote_average,vote_count,cast,director
1428,1428,35000000,Action Comedy Science Fiction,11918,anti hero high school dragonfly superhero radi...,Superhero Movie,The team behind Scary Movie takes on the comic...,19.088655,27/03/08,25871834,85,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",The greatest Superhero movie of all time! (not...,Superhero Movie,4.9,379,Drake Bell Sara Paxton Leslie Nielsen Christop...,Craig Mazin
305,305,140000000,Adventure Animation Family Fantasy Science Fic...,9016,cyborg based on novel space marine mutiny loss...,Treasure Planet,When space galleon cabin boy Jim Hawkins disco...,38.924136,26/11/02,109578115,95,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Find your place in the universe.,Treasure Planet,7.2,948,Joseph Gordon-Levitt Brian Murray David Hyde P...,Ron Clements
