# Pydough Demo

In [1]:
from llm import LLMClient

In [2]:
provider= "aws"
model = "us.anthropic.claude-3-7-sonnet-20250219-v1:0"
client = LLMClient(provider, model)

In [6]:
result= client.ask("For each of the 5 largest part sizes, find the part of that size with the largest retail price")
print(f"Pydough code generated: \n {result.code}")
print("-" * 50)
print(f"pydough dataframe: \n {result.df}")

Pydough code generated: 
 # First, calculate the maximum retail price for each part size
max_price_by_size = PARTITION(parts.CALCULATE(size=size, retail_price=retail_price), 
                            name="parts_group", 
                            by=size).CALCULATE(
    size=size,
    max_retail_price=MAX(parts_group.retail_price)
)

# Then join back to the parts collection to get the part details with the maximum retail price
parts_with_max_price = parts.CALCULATE(
    size=size,
    name=name,
    retail_price=retail_price,
    part_type=part_type
).WHERE(
    (size == max_price_by_size.size) & 
    (retail_price == max_price_by_size.max_retail_price)
)

# Finally, get the top 5 parts by size
largest_parts_by_size = parts_with_max_price.TOP_K(5, by=size.DESC())
--------------------------------------------------
pydough dataframe: 
 None


In [7]:
print(result.full_explanation)

I'll create a PyDough code snippet to find the part with the largest retail price for each of the 5 largest part sizes.

First, I need to:
1. Group parts by their size
2. For each size, find the part with the largest retail price
3. Sort by size in descending order
4. Take the top 5 sizes

Here's the PyDough code:

```python
# First, calculate the maximum retail price for each part size
max_price_by_size = GROUP_BY(parts.CALCULATE(size=size, retail_price=retail_price), 
                            name="parts_group", 
                            by=size).CALCULATE(
    size=size,
    max_retail_price=MAX(parts_group.retail_price)
)

# Then join back to the parts collection to get the part details with the maximum retail price
parts_with_max_price = parts.CALCULATE(
    size=size,
    name=name,
    retail_price=retail_price,
    part_type=part_type
).WHERE(
    (size == max_price_by_size.size) & 
    (retail_price == max_price_by_size.max_retail_price)
)

# Finally, get the top 5 parts

In [8]:
result.exception

"An error occurred while processing the code: Unrecognized term of simple table collection 'parts' in graph 'TPCH': 'parts'"

In [9]:
corrected_result = client.correct(result)

In [10]:
corrected_result.exception

In [11]:
corrected_result.full_explanation

'I see the issue with the code. The error message indicates that the collection \'parts\' is not recognized in the TPCH graph. Looking at the database structure reference file, I can see that the collection is indeed named \'parts\', but there might be an issue with how it\'s being accessed.\n\nLet me fix the code:\n\n```python\n# First, calculate the maximum retail price for each part size\nparts_with_size = parts.CALCULATE(\n    size=size,\n    retail_price=retail_price\n)\n\n# Group by size to find the maximum retail price for each size\nmax_price_by_size = GROUP_BY(parts_with_size, name="parts_group", by=size).CALCULATE(\n    size=size,\n    max_retail_price=MAX(parts_group.retail_price)\n)\n\n# Join back to the parts collection to get the part details with the maximum retail price\nparts_with_max_price = parts.CALCULATE(\n    size=size,\n    name=name,\n    retail_price=retail_price,\n    part_type=part_type\n).WHERE(\n    retail_price == max_price_by_size.WHERE(size == parts.size

In [14]:
corrected_result.df

Unnamed: 0,size,retail_price,name,part_type
0,7,901.00,goldenrod lavender spring chocolate lace,PROMO BURNISHED COPPER
1,1,902.00,blush thistle blue yellow saddle,LARGE BRUSHED BRASS
2,21,903.00,spring green yellow purple cornsilk,STANDARD POLISHED BRASS
3,14,904.00,cornflower chocolate smoke green pink,SMALL PLATED BRASS
4,15,905.00,forest brown coral puff cream,STANDARD POLISHED TIN
...,...,...,...,...
199995,11,2095.99,cream navajo saddle dodger navy,PROMO PLATED COPPER
199996,37,2096.99,peru maroon snow grey chartreuse,PROMO PLATED NICKEL
199997,49,2097.99,pink wheat powder burlywood snow,MEDIUM BURNISHED BRASS
199998,24,2098.99,goldenrod drab brown salmon mint,PROMO PLATED BRASS
