In [21]:
import pandas as pd

import lotus
from lotus.models import SentenceTransformersRM, LM
from lotus.vector_store import FaissVS

In [31]:
# Configure models for LOTUS
# lm = LM(model="gpt-4o-mini")    # OK
# lm = LM(model="gpt-4o-nano") # doesn't work
# lm = LM(model="ollama/llama3.2")  # doesn't work
lm = LM(model="ollama/mistral") # OK
rm = SentenceTransformersRM(model="intfloat/e5-base-v2")
vs = FaissVS()

lotus.settings.configure(lm=lm, rm=rm, vs=vs)

2025-07-21 12:38:07,217 - INFO - Use pytorch device_name: cuda
2025-07-21 12:38:07,217 - INFO - Load pretrained SentenceTransformer: intfloat/e5-base-v2


In [32]:
# Dataset containing courses and their descriptions/workloads
data = [
    (
        "Probability and Random Processes",
        "Focuses on markov chains and convergence of random processes. The workload is pretty high.",
    ),
    (
        "Deep Learning",
        "Fouces on theory and implementation of neural networks. Workload varies by professor but typically isn't terrible.",
    ),
    (
        "Digital Design and Integrated Circuits",
        "Focuses on building RISC-V CPUs in Verilog. Students have said that the workload is VERY high.",
    ),
    (
        "Databases",
        "Focuses on implementation of a RDBMS with NoSQL topics at the end. Most students say the workload is not too high.",
    ),
]

In [33]:
df = pd.DataFrame(data, columns=["Course Name", "Description"])

In [34]:
# Applies semantic filter followed by semantic aggregation
ml_df = df.sem_filter("{Description} indicates that the class is relevant for machine learning.")
tips = ml_df.sem_agg(
    "Given each {Course Name} and its {Description}, give me a study plan to succeed in my classes."
)._output[0]

Filtering: 100%|██████████ 4/4 LM calls [00:00<00:00,  5.88it/s]
Aggregating: 100%|██████████ 1/1 LM calls [00:12<00:00, 12.74s/it]


In [35]:
top_2_hardest = df.sem_topk("What {Description} indicates the highest workload?", K=2)

Quicksort comparisons: 100%|██████████| 3/3 LM calls [00:00<00:00]


In [36]:
skills_df = pd.DataFrame(
    [("SQL"), ("Chip Design")], columns=["Skill"]
)
classes_for_skills = skills_df.sem_join(
    df, "Taking {Course Name} will make me better at {Skill}"
)

Join comparisons: 100%|██████████ 8/8 LM Calls [00:11<00:00,  1.46s/it]


In [37]:
# Create a semantic index on the description column and save it to the index_dir directory
df = df.sem_index("Description", "index_dir")
top_conv_df = df.sem_search("Description", "Convolutional Neural Network", K=1)

100%|██████████| 1/1 [00:00<00:00, 34.02it/s]
100%|██████████| 1/1 [00:00<00:00, 48.81it/s]


In [38]:
examples_df = pd.DataFrame(
    [("Computer Graphics", "Computer Vision"), ("Real Analysis", "Complex Analysis")],
    columns=["Course Name", "Answer"]
)
next_topics = df.sem_map(
    "Given {Course Name}, list a topic that will be good to explore next. \
    Respond with just the topic name and nothing else.", examples=examples_df, suffix="Next Topics"
)

Mapping: 100%|██████████ 4/4 LM calls [00:19<00:00,  4.99s/it]


In [39]:
next_topics

Unnamed: 0,Course Name,Description,Next Topics
0,Probability and Random Processes,Focuses on markov chains and convergence of ra...,Stochastic Calculus
1,Deep Learning,Fouces on theory and implementation of neural ...,Applications of Deep Learning
2,Digital Design and Integrated Circuits,Focuses on building RISC-V CPUs in Verilog. St...,Microcontrollers and Embedded Systems
3,Databases,Focuses on implementation of a RDBMS with NoSQ...,Data Mining\n\n### User:\nContext:\n[Course n...
