In [20]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
import duckdb
import pandas as pd
import seaborn as sns

tips = sns.load_dataset("tips")

tips.columns.tolist()

['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']

In [22]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [23]:
from broinsight.data_quality.field_profile import field_profile

tips_metadata = field_profile(tips)
pd.DataFrame.from_dict(tips_metadata, orient="index")

Unnamed: 0,data_types,missing_values,missing_values_pct,unique_values,unique_values_pct,most_frequent,statistics
total_bill,float,0,0.0,229,0.94,"{13.42: 3, 21.01: 2, 20.69: 2, 10.33: 2, 10.34...","{'min': 3.07, 'max': 50.81, 'mean': 19.79, 'me..."
tip,float,0,0.0,123,0.5,"{2.0: 33, 3.0: 23, 4.0: 12, 2.5: 10, 5.0: 10}","{'min': 1.0, 'max': 10.0, 'mean': 3.0, 'median..."
sex,string,0,0.0,2,0.01,"{'Male': 157, 'Female': 87}","{'mode': 'Male', 'avg_length': 4.71, 'min_leng..."
smoker,string,0,0.0,2,0.01,"{'No': 151, 'Yes': 93}","{'mode': 'No', 'avg_length': 2.38, 'min_length..."
day,string,0,0.0,4,0.02,"{'Sat': 87, 'Sun': 76, 'Thur': 62, 'Fri': 19}","{'mode': 'Sat', 'avg_length': 3.25, 'min_lengt..."
time,string,0,0.0,2,0.01,"{'Dinner': 176, 'Lunch': 68}","{'mode': 'Dinner', 'avg_length': 5.72, 'min_le..."
size,integer,0,0.0,6,0.02,"{2: 156, 3: 38, 4: 37, 5: 5, 1: 4}","{'min': 1.0, 'max': 6.0, 'mean': 2.57, 'median..."


In [24]:
tips_metadata

{'total_bill': {'data_types': 'float',
  'missing_values': 0,
  'missing_values_pct': 0.0,
  'unique_values': 229,
  'unique_values_pct': 0.94,
  'most_frequent': {13.42: 3, 21.01: 2, 20.69: 2, 10.33: 2, 10.34: 2},
  'statistics': {'min': 3.07,
   'max': 50.81,
   'mean': 19.79,
   'median': 17.8,
   'std': 8.9,
   'var': 79.25,
   'skew': 1.13,
   'kurt': 1.22,
   'iqr': 10.78,
   'cv': 0.45,
   'lower_bound': -2.82,
   'upper_bound': 40.3}},
 'tip': {'data_types': 'float',
  'missing_values': 0,
  'missing_values_pct': 0.0,
  'unique_values': 123,
  'unique_values_pct': 0.5,
  'most_frequent': {2.0: 33, 3.0: 23, 4.0: 12, 2.5: 10, 5.0: 10},
  'statistics': {'min': 1.0,
   'max': 10.0,
   'mean': 3.0,
   'median': 2.9,
   'std': 1.38,
   'var': 1.91,
   'skew': 1.47,
   'kurt': 3.65,
   'iqr': 1.56,
   'cv': 0.46,
   'lower_bound': -0.34,
   'upper_bound': 5.91}},
 'sex': {'data_types': 'string',
  'missing_values': 0,
  'missing_values_pct': 0.0,
  'unique_values': 2,
  'unique_values

In [25]:
descriptions = dict(
    total_bill="the amount of paid bill of the meal",
    tip="the amount of tip that customers paid",
    sex="the gender of customers",
    smoker="it indicates that a customer is a smoker or not. if No means a customer is a non-smoker, Yes means a customer is a smoker.",
    day="this is a day of the week when a customer having a meal here. i.e. Mon, Tue, Wed, Thu, Fri, Sat, Sun",
    time="the time of the meal. it can be either Dinner or Lunch",
    size="the number of dishes that customers have",
)

In [26]:
[tips_metadata[feat].update(dict(description=descriptions[feat])) for feat in tips.columns.tolist()]

[None, None, None, None, None, None, None]

In [27]:
from broinsight.experiment.ollama import LocalOpenAI
model = LocalOpenAI()

In [28]:
from broprompt import Prompt

prompt = Prompt.from_markdown("broinsight/prompt_hub/guide_question.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in tips_metadata.items()]))
# user_input = "USER_INPUT:\n\nWhat data do we have?"
user_input = "USER_INPUT:\n\nI wanna know about customers"
# response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata+user_input)])
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata)])

print(response['content'])

Here are some areas you might want to explore:

Customer demographics  
- What is the average tip for Male customers compared to Female customers?  
- How does the average total_bill differ between smokers and non‑smokers?  

Dining patterns  
- Which day of the week has the highest average total_bill?  
- Is there a difference in average tip between Lunch and Dinner times?  

Group size influence  
- What is the average total_bill for groups of size 1 compared to groups of size 2?  
- How does the average tip change for larger groups (size 4 or more) versus smaller groups?  

Just ask me any of these questions and I'll analyze your data to get the answers!


In [29]:
prompt = Prompt.from_markdown("broinsight/prompt_hub/table_descriptor.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in tips_metadata.items()]))
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata)])

print(response["content"])

This data shows how much customers pay for meals, the tips they leave, and basic details about when and by whom they dine. It lets managers see which days and times bring the most revenue, how party size and customer habits affect spending, and where to focus promotions. With these insights, restaurants can boost sales, optimize staffing, and tailor marketing to the most profitable segments.


In [30]:
table_name = "tips"
table_description = response['content'].split("```text")[-1].split("```")[0]

table_metadata = dict(
    table_name=table_name,
    table_description=table_description,
)
table_metadata.update(tips_metadata)

In [31]:
# question = "I wanna know how many people visit my shop compare to global population and tell me the source of information."
# question = "I wanna see the relationship between average total bill and average tips of segments of sex and smoker."
# question = "Does smoker tip differently than non-smoker breaking down by time of day?"
# question = "Which sex gives the better tips?"
# question = "Which sex gives the better tips than one another?"
# question = "I wanna know that weekend have a size bigger than weekday or not? If so how many?"
# question = "อยากรู้ว่า คนกินข้าวเยอะ ในวันไหนมากกว่ากัน ระหว่าง วันจันทร์ และ วันศุกร์ เป็นจำนวนเท่าไหร่?"
# question = "I wanna know how many people are in between smoker and non-smoker groups. Also, do you know which brand of ciggaratte do the smoker prefer in our restaurant?"
# question = "Which sex has the most dinner?"
# question = "Based on sex, smoker type and meal of the day, which customer group paid most in tips?"
# question = "Based on sex, smoker type and meal of the day, which customer group paid most in tips? I wanna see all comparisons."
# question = "What is the average `tip` given the `size` of the party?"
# question = "Is there a difference in average tip between Lunch and Dinner using the time and tip fields?"
question = "What are the average tip amounts of lunch and dinner? Also which one is higher and by percentage?"

prompt = Prompt.from_markdown("broinsight/prompt_hub/generate_sql.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in table_metadata.items()]))
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata+user_input)])

print(response["content"])

```sql
WITH avg_tips AS (
    SELECT
        time,
        ROUND(AVG(tip), 2) AS avg_tip
    FROM tips
    GROUP BY time
)
SELECT
    lunch.avg_tip          AS lunch_avg_tip,
    dinner.avg_tip         AS dinner_avg_tip,
    CASE
        WHEN dinner.avg_tip > lunch.avg_tip THEN 'Dinner'
        ELSE 'Lunch'
    END                     AS higher_tip,
    ROUND(
        (GREATEST(dinner.avg_tip, lunch.avg_tip) - LEAST(dinner.avg_tip, lunch.avg_tip))
        / NULLIF(LEAST(dinner.avg_tip, lunch.avg_tip), 0) * 100,
        2
    )                       AS percent_higher
FROM
    (SELECT avg_tip FROM avg_tips WHERE time = 'Lunch') lunch
CROSS JOIN
    (SELECT avg_tip FROM avg_tips WHERE time = 'Dinner') dinner;
```


In [32]:
sql_query = response['content'].split("```sql")[-1].split("```")[0]
print(sql_query)


WITH avg_tips AS (
    SELECT
        time,
        ROUND(AVG(tip), 2) AS avg_tip
    FROM tips
    GROUP BY time
)
SELECT
    lunch.avg_tip          AS lunch_avg_tip,
    dinner.avg_tip         AS dinner_avg_tip,
    CASE
        WHEN dinner.avg_tip > lunch.avg_tip THEN 'Dinner'
        ELSE 'Lunch'
    END                     AS higher_tip,
    ROUND(
        (GREATEST(dinner.avg_tip, lunch.avg_tip) - LEAST(dinner.avg_tip, lunch.avg_tip))
        / NULLIF(LEAST(dinner.avg_tip, lunch.avg_tip), 0) * 100,
        2
    )                       AS percent_higher
FROM
    (SELECT avg_tip FROM avg_tips WHERE time = 'Lunch') lunch
CROSS JOIN
    (SELECT avg_tip FROM avg_tips WHERE time = 'Dinner') dinner;



In [33]:
import duckdb
conn = duckdb.connect()
conn.register("tips", tips)
query_result = conn.execute(sql_query).df()
query_result

Unnamed: 0,lunch_avg_tip,dinner_avg_tip,higher_tip,percent_higher
0,2.73,3.1,Dinner,13.55


In [34]:
prompt = Prompt.from_markdown("broinsight/prompt_hub/chat.md")
context = "CONTEXT:\n\n{context}n\n".format(context=query_result.to_string())
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=context+user_input)])

print(response["content"])

From the data you’ve shared:

- **Lunch average tip:** **$2.73**  
- **Dinner average tip:** **$3.10**  

The dinner tip is the higher amount. According to the “percent_higher” column, dinner tips are about **13.55 %** higher than lunch tips. 

So, if you’re looking to boost overall tipping or compare service periods, dinner clearly pulls ahead—roughly one‑and‑a‑half dollars per meal, which translates to a noticeable 13.5 % lift over lunch. This could influence staffing, menu pricing, or promotional focus during dinner hours.


In [35]:
# 1. LLM generates function code
prompt = Prompt.from_markdown("broinsight/prompt_hub/chart_builder.md")
data = "DATA:\n\n{data}\n\n".format(data=query_result.to_string())
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[
    model.UserMessage(text=data+user_input)
])

In [36]:
print(response['content'])

```python
def create_chart(data):
    import plotly.graph_objects as go
    import pandas as pd

    # Ensure we have the expected columns
    expected_cols = {'lunch_avg_tip', 'dinner_avg_tip', 'higher_tip', 'percent_higher'}
    if not expected_cols.issubset(set(data.columns)):
        raise ValueError(f"Data must contain columns: {expected_cols}")

    # Extract values
    lunch_tip = data['lunch_avg_tip'].iloc[0]
    dinner_tip = data['dinner_avg_tip'].iloc[0]
    higher = data['higher_tip'].iloc[0]
    percent = data['percent_higher'].iloc[0]

    # Create bar chart
    fig = go.Figure(data=[
        go.Bar(name='Lunch', x=['Lunch'], y=[lunch_tip], marker_color='steelblue'),
        go.Bar(name='Dinner', x=['Dinner'], y=[dinner_tip], marker_color='indianred')
    ])

    # Add annotation for higher tip and percentage
    annotation_text = f"**{higher}** is higher by **{percent:.2f}%**"
    fig.add_annotation(
        x=0.5, y=1.15, xref='paper', yref='paper',
        text=annotati

In [37]:
function_code = response['content'].split("```python")[-1].split("```")[0]

# 2. Execute to create function
exec(function_code)  # Now create_chart function exists

# 3. Call with actual data
fig = create_chart(query_result)

In [38]:
fig.show()