# LOTUS demo

## APIs

In [1]:
import bigframes.pandas as bpd
from bigframes.ml.llm import GeminiTextGenerator, _GEMINI_1P5_FLASH_001_ENDPOINT, _GEMINI_1P5_PRO_001_ENDPOINT

bpd.options.display.progress_bar = None

In [2]:
# First let's initialize the dataframe we will use to perform semantic operations on
data = {
    "Course Name": [
        "Probability and Random Processes",
        "Optimization Methods in Engineering",
        "Digital Design and Integrated Circuits",
        "Computer Security",
        "Operating Systems and Systems Programming",
        "Compilers",
        "Computer Networks",
        "Deep Learning",
        "Graphics",
        "Databases",
        "Art History",
    ]
}
df = bpd.DataFrame(data)
 
model = GeminiTextGenerator(model_name=_GEMINI_1P5_FLASH_001_ENDPOINT)

  return func(get_global_session(), *args, **kwargs)


### 1. `sem_filter`

In [3]:
predict_df = df.sem_filter("{Course Name} requires a lot of math", model, logprobs=True)
predict_df



Unnamed: 0,Course Name,primary_results,primary_confidence_scores
0,Probability and Random Processes,True,0.95
1,Optimization Methods in Engineering,True,0.95
2,Digital Design and Integrated Circuits,True,0.95
5,Compilers,True,0.8
7,Deep Learning,True,0.95
8,Graphics,True,0.8


### 2. `sem_join`

In [4]:
skills_df = bpd.DataFrame({"Skill": ["Art", "Cryptography", "Baking"]})

join_df = df.sem_join(skills_df, "Taking {Course Name} will make me better at {Skill}", primary_model=model, logprobs=True)
join_df



Unnamed: 0,Course Name,Skill,primary_results,primary_confidence_scores
10,Computer Security,Cryptography,True,0.8
19,Computer Networks,Cryptography,True,0.8
24,Graphics,Art,True,0.75


### 3. `sem_map`

In [5]:
map_df = df.sem_map("Generate a short study plan to succeed in {Course Name}", primary_model=model)
map_df



Unnamed: 0,Course Name,_map
0,Probability and Random Processes,## Short Study Plan for Probability and Random...
1,Optimization Methods in Engineering,## Study Plan for Optimization Methods in Engi...
2,Digital Design and Integrated Circuits,## Short Study Plan for Digital Design and Int...
3,Computer Security,## Computer Security Study Plan: **Goal:** Ac...
4,Operating Systems and Systems Programming,## Study Plan for Operating Systems and System...
5,Compilers,## Short Study Plan for Compilers: **1. Unde...
6,Computer Networks,## Short Study Plan for Computer Networks: **...
7,Deep Learning,## Short Study Plan for Deep Learning: **1. F...
8,Graphics,## Graphics Course Study Plan: **Goal:** Ach...
9,Databases,## Short Study Plan for Databases: **1. Maste...


In [8]:
map_df.iloc[0, 1]

"## Short Study Plan for Probability and Random Processes\n\n**Understanding the Fundamentals:**\n\n* **Week 1-2:** Focus on solidifying the core concepts of probability theory: \n    * Probability spaces, events, and random variables. \n    * Probability distributions (discrete and continuous).\n    * Expected value, variance, and other key measures.\n* **Week 3-4:** Dive into the world of random processes: \n    * Understand different types of processes (e.g., Bernoulli, Poisson, Markov).\n    * Learn about their properties and how they are used to model real-world phenomena.\n\n**Practice and Application:**\n\n* **Throughout the course:**  \n    * Solve numerous practice problems and exercises. \n    * Emphasize understanding the underlying concepts behind each problem. \n    * Seek clarification on any difficulties you encounter.\n* **Week 5-6:** Begin working on project assignments or relevant research topics. \n    * This will test your understanding and application of the course

## Optimizations

### Cascade Models

In [6]:
primary_model = GeminiTextGenerator(model_name=_GEMINI_1P5_FLASH_001_ENDPOINT)
 # Define as backup model for confidence cascade
backup_model = GeminiTextGenerator(model_name=_GEMINI_1P5_PRO_001_ENDPOINT)

In [7]:
predict_df = df.sem_filter(
    "{Course Name} requires a lot of math", 
    primary_model, 
    backup_model,
    confidence_threshold=0.9, 
    logprobs=True
)
predict_df



Unnamed: 0,Course Name,primary_results,primary_confidence_scores,backup_results,backup_confidence_scores
0,Probability and Random Processes,True,0.95,,
1,Optimization Methods in Engineering,True,0.95,,
2,Digital Design and Integrated Circuits,True,0.95,,
7,Deep Learning,True,0.95,,
