In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import duckdb
import pandas as pd
import seaborn as sns

tips = sns.load_dataset("tips")

tips.columns.tolist()

['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']

In [3]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [4]:
from broinsight.data_quality.field_profile import field_profile

tips_metadata = field_profile(tips)
pd.DataFrame.from_dict(tips_metadata, orient="index")

Unnamed: 0,data_types,missing_values,missing_values_pct,unique_values,unique_values_pct,most_frequent,statistics
total_bill,float,0,0.0,229,0.94,"{13.42: 3, 13.81: 2, 15.98: 2, 17.92: 2, 10.07...","{'min': 3.07, 'max': 50.81, 'mean': 19.79, 'me..."
tip,float,0,0.0,123,0.5,"{2.0: 33, 3.0: 23, 4.0: 12, 5.0: 10, 2.5: 10}","{'min': 1.0, 'max': 10.0, 'mean': 3.0, 'median..."
sex,string,0,0.0,2,0.01,"{'Male': 157, 'Female': 87}","{'mode': 'Male', 'avg_length': 4.71, 'min_leng..."
smoker,string,0,0.0,2,0.01,"{'No': 151, 'Yes': 93}","{'mode': 'No', 'avg_length': 2.38, 'min_length..."
day,string,0,0.0,4,0.02,"{'Sat': 87, 'Sun': 76, 'Thur': 62, 'Fri': 19}","{'mode': 'Sat', 'avg_length': 3.25, 'min_lengt..."
time,string,0,0.0,2,0.01,"{'Dinner': 176, 'Lunch': 68}","{'mode': 'Dinner', 'avg_length': 5.72, 'min_le..."
size,integer,0,0.0,6,0.02,"{2: 156, 3: 38, 4: 37, 5: 5, 1: 4}","{'min': 1.0, 'max': 6.0, 'mean': 2.57, 'median..."


In [5]:
tips_metadata

{'total_bill': {'data_types': 'float',
  'missing_values': 0,
  'missing_values_pct': 0.0,
  'unique_values': 229,
  'unique_values_pct': 0.94,
  'most_frequent': {13.42: 3, 13.81: 2, 15.98: 2, 17.92: 2, 10.07: 2},
  'statistics': {'min': 3.07,
   'max': 50.81,
   'mean': 19.79,
   'median': 17.8,
   'std': 8.9,
   'var': 79.25,
   'skew': 1.13,
   'kurt': 1.22,
   'iqr': 10.78,
   'cv': 0.45,
   'lower_bound': -2.82,
   'upper_bound': 40.3}},
 'tip': {'data_types': 'float',
  'missing_values': 0,
  'missing_values_pct': 0.0,
  'unique_values': 123,
  'unique_values_pct': 0.5,
  'most_frequent': {2.0: 33, 3.0: 23, 4.0: 12, 5.0: 10, 2.5: 10},
  'statistics': {'min': 1.0,
   'max': 10.0,
   'mean': 3.0,
   'median': 2.9,
   'std': 1.38,
   'var': 1.91,
   'skew': 1.47,
   'kurt': 3.65,
   'iqr': 1.56,
   'cv': 0.46,
   'lower_bound': -0.34,
   'upper_bound': 5.91}},
 'sex': {'data_types': 'string',
  'missing_values': 0,
  'missing_values_pct': 0.0,
  'unique_values': 2,
  'unique_values

In [6]:
descriptions = dict(
    total_bill="the amount of paid bill of the meal",
    tip="the amount of tip that customers paid",
    sex="the gender of customers",
    smoker="it indicates that a customer is a smoker or not. if No means a customer is a non-smoker, Yes means a customer is a smoker.",
    day="this is a day of the week when a customer having a meal here. i.e. Mon, Tue, Wed, Thu, Fri, Sat, Sun",
    time="the time of the meal. it can be either Dinner or Lunch",
    size="the number of dishes that customers have",
)

In [7]:
[tips_metadata[feat].update(dict(description=descriptions[feat])) for feat in tips.columns.tolist()]

[None, None, None, None, None, None, None]

In [8]:
from broinsight.experiment.ollama import LocalOpenAI
model = LocalOpenAI()

In [9]:
from broprompt import Prompt

prompt = Prompt.from_markdown("broinsight/prompt_hub/guide_question.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in tips_metadata.items()]))
user_input = "USER_INPUT:\n\nWhat data do we have?"
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata+user_input)])

print(response['content'])

Hi there!  
From the metadata you shared, your dataset captures a snapshot of restaurant visits. Here’s a quick rundown of what’s available:

| Variable | Type | What it represents | Key stats |
|----------|------|--------------------|-----------|
| **total_bill** | float | Amount customers paid for their meal | $3.07 – $50.81 (avg $19.79) |
| **tip** | float | Tip given by the customer | $1 – $10 (avg $3.00) |
| **sex** | string | Customer gender | Male / Female |
| **smoker** | string | Whether the customer smokes | Yes / No |
| **day** | string | Day of the week the visit occurred | Sat, Sun, Thu, Fri |
| **time** | string | Meal time | Dinner / Lunch |
| **size** | integer | Number of people in the party | 1 – 6 (avg 2.57) |

With these fields you can explore a variety of business questions—from how tips vary by day or time, to whether party size influences the total bill.

---

### 3‑5 actionable questions you might want to ask next

1. **What is the average tip amount for each da

In [10]:
prompt = Prompt.from_markdown("broinsight/prompt_hub/table_descriptor.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in tips_metadata.items()]))
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata)])

print(response["content"])

```text
This table records every dining visit at the restaurant.  
Each row captures the total amount the customer paid, the tip they left, the
gender and smoking status of the diner, the day of the week and whether the
meal was lunch or dinner, and how many dishes were ordered.  

Key business uses  
• Track revenue and tip trends by day, meal time, or party size.  
• Identify patterns in spending or tipping that could inform pricing or
promotion decisions.  
• Assess how customer demographics (gender, smoker) or group size affect
spending and tip levels.  
• Plan staffing and inventory by knowing peak days and meal periods.  

Relationships to other processes  
• Finance: feeds daily revenue and tip reporting.  
• Marketing: helps target promotions to high‑spending or tip‑generous groups.  
• Operations: supports scheduling and supply‑chain decisions.  

Typical users  
• Restaurant managers and owners for day‑to‑day and strategic decisions.  
• Finance teams for revenue and tip anal

In [11]:
table_name = "tips"
table_description = response['content'].split("```text")[-1].split("```")[0]

table_metadata = dict(
    table_name=table_name,
    table_description=table_description,
)
table_metadata.update(tips_metadata)

In [67]:
question = "I wanna know how many people visit my shop compare to global population and tell me the source of information."
# question = "Does smoker tip differently than non-smoker breaking down by time of day?"
# question = "Which sex gives the better tips?"
# question = "Which sex gives the better tips than one another?"
# question = "I wanna know that weekend have a size bigger than weekday or not? If so how many?"
# question = "อยากรู้ว่า คนกินข้าวเยอะ ในวันไหนมากกว่ากัน ระหว่าง วันจันทร์ และ วันศุกร์ เป็นจำนวนเท่าไหร่?"
# question = "I wanna know how many people are in between smoker and non-smoker groups. Also, do you know which brand of ciggaratte do the smoker prefer in our restaurant?"
# question = "Which sex has the most dinner?"
# question = "Based on sex, smoker type and meal of the day, which customer group paid most in tips?"
# question = "Based on sex, smoker type and meal of the day, which customer group paid most in tips? I wanna see all comparisons."

prompt = Prompt.from_markdown("broinsight/prompt_hub/generate_sql.md")
metadata = "METADATA:\n\n{metadata}\n\n".format(metadata="\n".join(["{field}: {detail}".format(field=field, detail=detail) for field, detail in table_metadata.items()]))
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=metadata+user_input)])

print(response["content"])

```sql
-- Count of shop visits recorded in the tips table
SELECT COUNT(*) AS shop_visits
FROM tips;

-- Source for global population figure: United Nations World Population Prospects
```


In [68]:
sql_query = response['content'].split("```sql")[-1].split("```")[0]
print(sql_query)


-- Count of shop visits recorded in the tips table
SELECT COUNT(*) AS shop_visits
FROM tips;

-- Source for global population figure: United Nations World Population Prospects



In [69]:
import duckdb
conn = duckdb.connect()
conn.register("tips", tips)
query_result = conn.execute(sql_query).df()
query_result

Unnamed: 0,shop_visits
0,244


In [70]:
prompt = Prompt.from_markdown("broinsight/prompt_hub/chat.md")
context = "CONTEXT:\n\n{context}n\n".format(context=query_result.to_string())
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[model.UserMessage(text=context+user_input)])

print(response["content"])

### Quick Answer  
- **Shop visits**: 244 (the figure you provided)  
- **Global population**: ≈ 8 billion (World Bank/UN estimate)  
- **Proportion**:  
  \[
  \frac{244}{8{,}000{,}000{,}000}\;=\;3.05\times10^{-8}
  \]
  That’s about **0.000003 %** of the world’s people.  
- **Source for global population**:  
  *World Bank – World Development Indicators* (latest 2023 estimate) and the *United Nations World Population Prospects* (2022 revision).  
  (Both are freely available online and updated annually.)

---

## How the numbers line up

| Metric | Value | Notes |
|--------|-------|-------|
| **Shop visits** | 244 | From your data; not clear if daily, weekly, monthly, etc. |
| **Global population** | 8 billion | 2023 estimate (World Bank). |
| **Visits per person worldwide** | 3.05 × 10⁻⁸ | Very small fraction. |

> **Interpretation** – 244 visits is an incredibly tiny slice of the global population.  
> If you’re looking at *daily* visits, that would be roughly 0.000003 % of all peo

In [71]:
# 1. LLM generates function code
prompt = Prompt.from_markdown("broinsight/prompt_hub/chart_builder.md")
data = "DATA:\n\n{data}\n\n".format(data=query_result.to_string())
user_input = "USER_INPUT:\n\n{question}".format(question=question)
response = model.run(system_prompt=prompt.str, messages=[
    model.UserMessage(text=data+user_input)
])

In [72]:
print(response['content'])

```python
def create_chart(data):
    import plotly.express as px
    import plotly.graph_objects as go
    import pandas as pd
    
    # Extract shop visits count
    shop_visits = data['shop_visits'].iloc[0]
    
    # Global population estimate (World Bank, 2023)
    global_population = 7_900_000_000
    
    # Build comparison DataFrame
    comp_df = pd.DataFrame({
        'Metric': ['Shop Visits', 'Global Population'],
        'Count': [shop_visits, global_population]
    })
    
    # Create bar chart with logarithmic y-axis to handle scale difference
    fig = px.bar(
        comp_df,
        x='Metric',
        y='Count',
        title='Shop Visits vs Global Population',
        labels={'Metric': 'Metric', 'Count': 'Count'},
        text='Count'
    )
    
    # Update layout for log scale and better appearance
    fig.update_layout(
        yaxis=dict(type='log', title='Count (log scale)'),
        xaxis=dict(title='Metric'),
        title=dict(x=0.5),
        uniformtext_min

In [73]:
function_code = response['content'].split("```python")[-1].split("```")[0]

# 2. Execute to create function
exec(function_code)  # Now create_chart function exists

# 3. Call with actual data
fig = create_chart(query_result)

In [75]:
fig.show()

In [55]:
# 4. Display with fallback
try:
    fig.show()
except ValueError:
    fig.show(renderer="browser")  # Fallback to browser
