# LOTUS demo

## APIs

In [1]:
import bigframes.pandas as bpd
from bigframes.ml.llm import GeminiTextGenerator, _GEMINI_1P5_FLASH_001_ENDPOINT, _GEMINI_1P5_PRO_001_ENDPOINT

bpd.options.display.progress_bar = None

In [2]:
# First let's initialize the dataframe we will use to perform semantic operations on
data = {
    "Course Name": [
        "Probability and Random Processes",
        "Optimization Methods in Engineering",
        "Digital Design and Integrated Circuits",
        "Computer Security",
        "Operating Systems and Systems Programming",
        "Compilers",
        "Computer Networks",
        "Deep Learning",
        "Graphics",
        "Databases",
        "Art History",
    ]
}
df = bpd.DataFrame(data)
 
model = GeminiTextGenerator(model_name=_GEMINI_1P5_FLASH_001_ENDPOINT)

  return func(get_global_session(), *args, **kwargs)


### 1. `sem_filter`

In [3]:
predict_df = df.sem_filter("{Course Name} requires a lot of math", model, logprobs=True)
predict_df



Unnamed: 0,Course Name,confidence_scores
0,Probability and Random Processes,0.95
1,Optimization Methods in Engineering,0.95
2,Digital Design and Integrated Circuits,0.95
5,Compilers,0.8
7,Deep Learning,0.95
8,Graphics,0.8


### 2. `sem_join`

In [4]:
skills_df = bpd.DataFrame({"Skill": ["Art", "Cryptography", "Baking"]})

join_df = df.sem_join(skills_df, "Taking {Course Name} will make me better at {Skill}", model=model, logprobs=True)
join_df



Unnamed: 0,Course Name,Skill,confidence_scores
10,Computer Security,Cryptography,0.85
19,Computer Networks,Cryptography,0.8
24,Graphics,Art,0.85


### 3. `sem_map`

In [5]:
map_df = df.sem_map("Generate a short study plan to succeed in {Course Name}", model=model)
map_df



Unnamed: 0,Course Name,_map
0,Probability and Random Processes,## Study Plan for Probability and Random Proce...
1,Optimization Methods in Engineering,## Study Plan for Optimization Methods in Engi...
2,Digital Design and Integrated Circuits,## Study Plan for Digital Design and Integrate...
3,Computer Security,## Computer Security Study Plan **Goal:** Ac...
4,Operating Systems and Systems Programming,## Study Plan for Operating Systems and Syste...
5,Compilers,## Short Study Plan for Compilers: **1. Funda...
6,Computer Networks,## Short Study Plan for Computer Networks **G...
7,Deep Learning,## Short Study Plan for Deep Learning: **Focu...
8,Graphics,## Graphics Course Study Plan: **Goal:** Achi...
9,Databases,## Short Study Plan for Databases: **1. Acti...


In [6]:
map_df.iloc[0, 1]

"## Study Plan for Probability and Random Processes\n\n**Goal:**  Master the core concepts and build strong problem-solving skills in Probability and Random Processes.\n\n**Strategy:** \n\n1. **Understand the Fundamentals:**\n    * **Week 1-2:** Focus on probability basics: events, axioms, probability distributions, conditional probability, Bayes' Theorem.  \n    * **Week 3-4:** Dive deeper into random variables, expected value, variance, common distributions (Bernoulli, Binomial, Poisson, Normal). \n    * **Week 5-6:**  Explore fundamental concepts of random processes: stochastic processes, Markov Chains, Poisson process. \n\n2. **Practice Regularly:**\n    * **Daily:** Solve at least 5-10 problems from the textbook or previous exams. \n    * **Weekly:**  Review class notes, work on challenging problems, and try to explain concepts to yourself or a study partner.\n\n3. **Seek Help and Resources:**\n    * **Office Hours:**  Utilize your professor's office hours to clarify concepts and 

## Optimizations

### Cascade Models

In [7]:
# Cascade models, where the smaller model running first to save cost.
large_model = GeminiTextGenerator(model_name=_GEMINI_1P5_PRO_001_ENDPOINT)
helper_model = GeminiTextGenerator(model_name=_GEMINI_1P5_FLASH_001_ENDPOINT)


In [8]:
predict_df = df.sem_filter(
    "{Course Name} requires a lot of math", 
    model=large_model, 
    helper_model=helper_model,
    confidence_threshold=0.9, 
    logprobs=True
)
predict_df



Debug:
5 rows resolved by helper model.
6 rows resolved by large model


Unnamed: 0,Course Name,helper_lm_results,helper_lm_confidence_scores,large_lm_results,large_lm_confidence_scores
0,Probability and Random Processes,True,0.95,,
1,Optimization Methods in Engineering,True,0.99,,
2,Digital Design and Integrated Circuits,True,0.95,,
7,Deep Learning,True,0.95,,
