In [400]:
import pandas as pd
import duckdb

In [401]:
# Input the dataset 
input = 'Cocktails Dataset.xlsx'
excel_sheets = pd.ExcelFile(input).sheet_names
excel_sheets

['Cocktails', 'Sourcing', 'Conversion Rates']

In [402]:
cocktails = pd.read_excel(input, sheet_name='Cocktails')
cocktails.columns = cocktails.columns.str.lower()
cocktails.rename(columns={'price (£)':'price', 'recipe (ml)':'recipe'}, inplace=True)
cocktails

Unnamed: 0,cocktail,price,recipe
0,Raspberry Lemon Drop,8.5,Citroen Vodka:45ml; Chambord:20ml; Triple Sec:...
1,Bay Breeze,7.2,Plain Vodka:60ml; Cranberry Juice:90ml; Pineap...
2,Alabama Slammer,8.25,Southern Comfort:15ml; Sloe Gin:15ml; Amaretto...
3,Watermelon Man,7.0,Plain Vodka:60ml; Watermelon Schapps:30ml; Coi...
4,Orange Blossom,8.7,London Dry Gin:30ml; Cointreau:10ml; Orange Ju...


In [403]:
sourcing = pd.read_excel(input, sheet_name='Sourcing')
sourcing.columns = sourcing.columns.str.lower().str.replace(' ','_')

sourcing

Unnamed: 0,ingredient,price,ml_per_bottle,currency
0,Citroen Vodka,19.25,500.0,Euro
1,Chambord,22.85,450.0,Euro
2,Triple Sec,12.0,400.0,Dollar
3,Plain Vodka,15.24,500.0,Euro
4,Cranberry Juice,1.33,1000.0,Pound
5,Pineapple Juice,1.8,1000.0,Pound
6,Southern Comfort,20.99,750.0,Dollar
7,Sloe Gin,22.99,500.0,Euro
8,Amaretto,16.6,500.0,Euro
9,Orange Juice,1.42,1000.0,Pound


In [404]:
conversion_rates = pd.read_excel(input, sheet_name='Conversion Rates')
conversion_rates.columns = conversion_rates.columns.str.lower()
conversion_rates.rename(columns={'conversion rate £':'conversion_rate'}, inplace=True)
conversion_rates

Unnamed: 0,currency,conversion_rate
0,Euro,1.14
1,Dollar,1.38
2,Pound,1.0


In [405]:
# Solve the challenge
output = duckdb.sql("""
                    select 
                        cocktail, price, 
                        split_part(recipe, '; ', 1) as c1,
                        split_part(recipe, '; ', 2) as c2,
                        split_part(recipe, '; ', 3) as c3,
                        split_part(recipe, '; ', 4) as c4,
                    from cocktails
                    """).to_df()
output

Unnamed: 0,cocktail,price,c1,c2,c3,c4
0,Raspberry Lemon Drop,8.5,Citroen Vodka:45ml,Chambord:20ml,Triple Sec:20ml,
1,Bay Breeze,7.2,Plain Vodka:60ml,Cranberry Juice:90ml,Pineapple Juice:30ml,
2,Alabama Slammer,8.25,Southern Comfort:15ml,Sloe Gin:15ml,Amaretto:15ml,Orange Juice:120ml
3,Watermelon Man,7.0,Plain Vodka:60ml,Watermelon Schapps:30ml,Cointreau:30ml,Lime Soda:200ml
4,Orange Blossom,8.7,London Dry Gin:30ml,Cointreau:10ml,Orange Juice:30ml,


In [406]:
output = duckdb.sql("""
                    select cocktail, price,
                        unnest(array[c1,c2,c3,c4]) as pivot_value
                    from output
                    """).to_df()
output

Unnamed: 0,cocktail,price,pivot_value
0,Raspberry Lemon Drop,8.5,Citroen Vodka:45ml
1,Raspberry Lemon Drop,8.5,Chambord:20ml
2,Raspberry Lemon Drop,8.5,Triple Sec:20ml
3,Raspberry Lemon Drop,8.5,
4,Bay Breeze,7.2,Plain Vodka:60ml
5,Bay Breeze,7.2,Cranberry Juice:90ml
6,Bay Breeze,7.2,Pineapple Juice:30ml
7,Bay Breeze,7.2,
8,Alabama Slammer,8.25,Southern Comfort:15ml
9,Alabama Slammer,8.25,Sloe Gin:15ml


In [407]:
output = duckdb.sql("""
                    select cocktail, max(price) as price, round(sum(cost),2) as cost, round((max(price) - sum(cost)),2) as margin
                    from
                        (select cocktail, price, a.ingredient, (cast(replace(measure, 'ml', '') as integer) * price_per_ml) as cost, price_per_ml
                        from

                            (select cocktail, price, 
                                split_part(pivot_value, ':', 1) as ingredient,
                                split_part(pivot_value, ':', 2) as measure
                            from output) a

                        inner join

                            (select ingredient, (price / conversion_rate / ml_per_bottle) as price_per_ml
                            from sourcing s
                            inner join conversion_rates c
                            on s.currency = c.currency) b
                        on a.ingredient = b. ingredient) d
                    group by cocktail
                    """).to_df()
output.head()

Unnamed: 0,cocktail,price,cost,margin
0,Raspberry Lemon Drop,8.5,2.85,5.65
1,Bay Breeze,7.2,1.78,5.42
2,Alabama Slammer,8.25,1.52,6.73
3,Watermelon Man,7.0,3.58,3.42
4,Orange Blossom,8.7,0.88,7.82
