In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import duckdb
import pandas as pd
import seaborn as sns

tips = sns.load_dataset("tips")

tips.columns.tolist()

['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']

In [3]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [28]:
from broinsight.data_quality.field_profile import field_profile

tips_metadata = field_profile(tips)
pd.DataFrame.from_dict(tips_metadata, orient="index")

Unnamed: 0,data_types,missing_values,missing_values_pct,unique_values,unique_values_pct,most_frequent,statistics
total_bill,float,0,0.0,229,0.94,"{13.42: 3, 21.01: 2, 20.69: 2, 10.33: 2, 10.34...","{'min': 3.07, 'max': 50.81, 'mean': 19.79, 'me..."
tip,float,0,0.0,123,0.5,"{2.0: 33, 3.0: 23, 4.0: 12, 2.5: 10, 5.0: 10}","{'min': 1.0, 'max': 10.0, 'mean': 3.0, 'median..."
sex,string,0,0.0,2,0.01,"{'Male': 157, 'Female': 87}","{'mode': 'Male', 'avg_length': 4.71, 'min_leng..."
smoker,string,0,0.0,2,0.01,"{'No': 151, 'Yes': 93}","{'mode': 'No', 'avg_length': 2.38, 'min_length..."
day,string,0,0.0,4,0.02,"{'Sat': 87, 'Sun': 76, 'Thur': 62, 'Fri': 19}","{'mode': 'Sat', 'avg_length': 3.25, 'min_lengt..."
time,string,0,0.0,2,0.01,"{'Dinner': 176, 'Lunch': 68}","{'mode': 'Dinner', 'avg_length': 5.72, 'min_le..."
size,integer,0,0.0,6,0.02,"{2: 156, 3: 38, 4: 37, 5: 5, 1: 4}","{'min': 1.0, 'max': 6.0, 'mean': 2.57, 'median..."


In [29]:
descriptions = dict(
    total_bill="the amount of paid bill of the meal",
    tip="the amount of tip that customers paid",
    sex="the gender of customers",
    smoker="it indicates that a customer is a smoker or not. if No means a customer is a non-smoker, Yes means a customer is a smoker.",
    day="this is a day of the week when a customer having a meal here. i.e. Mon, Tue, Wed, Thu, Fri, Sat, Sun",
    time="the time of the meal. it can be either Dinner or Lunch",
    size="the number of dishes that customers have",
)

In [30]:
[tips_metadata[feat].update(dict(description=descriptions[feat])) for feat in tips.columns.tolist()]

[None, None, None, None, None, None, None]

In [31]:
import requests

class LocalOpenAI:
    def __init__(self):
        self.model_name = "gpt-oss:latest"
        self.base_url = "http://localhost:11434"
        self.temperature = 0.8        
    
    def UserMessage(self, text):
        return {"role": "user", "content": text}

    def AIMessage(self, text):
        return {"role": "assistant", "content": text}

    def SystemMessage(self, text):
        return {"role": "system", "content": text}

    def OutputMessage(self, response):
        return dict(
            content=response["message"]["content"],
            model_name=self.model_name,
            input_token=0,
            output_token=0
        )

    def run(self, system_prompt, messages):
        all_messages = [self.SystemMessage(system_prompt)] + messages
        response = requests.post(
            "{base_url}/api/chat".format(base_url=self.base_url), 
            json={
                "model": self.model_name,
                "messages": all_messages,
                "stream": False,
                "options": {"temperature": self.temperature}
            }
        )
        response = response.json()
        return self.OutputMessage(response)

In [32]:
model = LocalOpenAI()

In [58]:
from broprompt import Prompt

prompt = Prompt.from_markdown("broinsight/prompt_hub/guide_question.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in tips_metadata.items()]))
user_input = "USER_INPUT:\n\nWhat data do we have?"
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata+user_input)])

print(response["content"])

Here’s a quick snapshot of what your dataset contains:

| Variable | Type | What it represents | Key stats (from the metadata) |
|----------|------|---------------------|------------------------------|
| **total_bill** | float | Amount paid for the meal (in dollars) | 3.07 – 50.81 $ (mean ≈ 19.8 $) |
| **tip** | float | Tip paid by the customer | 1.00 – 10.00 $ (mean ≈ 3.0 $) |
| **sex** | string | Customer’s gender | “Male” (157) vs. “Female” (87) |
| **smoker** | string | Whether the customer is a smoker | “No” (151) vs. “Yes” (93) |
| **day** | string | Day of the week the visit occurred | Sat (87), Sun (76), Thur (62), Fri (19) |
| **time** | string | Time of the meal | Dinner (176), Lunch (68) |
| **size** | integer | Number of people in the party | 1 – 6 (most common 2) |

**Key take‑aways**

- **No missing data** – every record has a value for each column.
- The dataset is fairly small (≈ 250 rows, typical of the classic “tips” example from seaborn), so you can do a lot of quick

In [59]:
prompt = Prompt.from_markdown("broinsight/prompt_hub/table_descriptor.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in tips_metadata.items()]))
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata)])

print(response["content"])

```text
This table records individual restaurant visits. Each row captures the bill amount, the tip paid, the day of the week, the meal time (Lunch or Dinner), the party size, and simple customer details such as gender and whether the customer smokes. 

Business value:
- **Revenue insights**: See how bill size varies by day, time, and party size, and how tips contribute to overall earnings. 
- **Customer segmentation**: Identify patterns such as whether male or female diners, smokers vs. non‑smokers, or larger parties tend to tip more or less. 
- **Operational planning**: Use peak days and times to schedule staff, and adjust menu or pricing for high‑volume periods. 
- **Marketing & loyalty**: Target promotions to groups that spend more or tip generously, and tailor offers for lunch versus dinner crowds.

Typical users:
- Restaurant managers and front‑office staff for daily operations and staffing decisions.  
- Finance and accounting teams to forecast revenue and analyze tip trends.  


In [60]:
table_name = "tips"
table_description = response['content'].split("```text")[-1].split("```")[0]

table_metadata = dict(
    table_name=table_name,
    table_description=table_description,
)
table_metadata.update(tips_metadata)

In [81]:
# question = "Does smoker tip differently than non-smoker breaking down by time of day?"
# question = "Which sex gives the better tips?"
# question = "Which sex has the most dinner?"
# question = "Based on sex, smoker type and meal of the day, which customer group paid most in tips?"
question = "Based on sex, smoker type and meal of the day, which customer group paid most in tips? I wanna see all comparisons."

prompt = Prompt.from_markdown("broinsight/prompt_hub/generate_sql.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in table_metadata.items()]))
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata+user_input)])

print(response["content"])

```sql
SELECT
    t.sex,
    t.smoker,
    t.time AS meal_time,
    COUNT(*)          AS visit_count,
    SUM(t.tip)        AS total_tip,
    AVG(t.tip)        AS avg_tip
FROM tips AS t
GROUP BY
    t.sex,
    t.smoker,
    t.time
ORDER BY
    total_tip DESC;
```


In [82]:
sql_query = response['content'].split("```sql")[-1].split("```")[0]
print(sql_query)


SELECT
    t.sex,
    t.smoker,
    t.time AS meal_time,
    COUNT(*)          AS visit_count,
    SUM(t.tip)        AS total_tip,
    AVG(t.tip)        AS avg_tip
FROM tips AS t
GROUP BY
    t.sex,
    t.smoker,
    t.time
ORDER BY
    total_tip DESC;



In [83]:
import duckdb
conn = duckdb.connect()
conn.register("tips", tips)
query_result = conn.execute(sql_query).df()
query_result

Unnamed: 0,sex,smoker,meal_time,visit_count,total_tip,avg_tip
0,Male,No,Dinner,77,243.17,3.158052
1,Male,Yes,Dinner,47,146.79,3.123191
2,Female,No,Dinner,29,88.28,3.044138
3,Female,Yes,Dinner,23,67.83,2.94913
4,Female,No,Lunch,25,61.49,2.4596
5,Male,No,Lunch,20,58.83,2.9415
6,Male,Yes,Lunch,13,36.28,2.790769
7,Female,Yes,Lunch,10,28.91,2.891


In [84]:
prompt = Prompt.from_markdown("broinsight/prompt_hub/chat.md")
context = "CONTEXT:\n\n{context}n\n".format(context=query_result.to_string())
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=context+user_input)])

print(response["content"])

**Which customer group tipped the most?**

| Sex   | Smoker | Meal time | Total Tips | Avg Tip per visit |
|-------|--------|-----------|------------|-------------------|
| Male  | No     | Dinner    | **$243.17** | $3.16 |
| Male  | Yes    | Dinner    | $146.79     | $3.12 |
| Female| No     | Dinner    | $88.28      | $3.04 |
| Female| Yes    | Dinner    | $67.83      | $2.95 |
| Female| No     | Lunch     | $61.49      | $2.46 |
| Male  | No     | Lunch     | $58.83      | $2.94 |
| Male  | Yes    | Lunch     | $36.28      | $2.79 |
| Female| Yes    | Lunch     | $28.91      | $2.89 |

**Result:**  
The *Male, non‑smoker, Dinner* group generated the highest total tip amount ($243.17).  

---

### Quick sanity check: average tip per visit

If you’re curious about who tipped the most on a *per‑visit* basis, the average tips are already in the table above (they’re essentially the “avg_tip” column from your data). The same group also leads here with $3.16 per visit.

---

### How to get