In [1]:
%run helper/setup_notebook.ipynb import display_table

Successfully connected to leetcode50 database.


In [2]:
tables = ['Product', 'Sales']
for table in tables:
    display_table(table)

+------------+--------------+
| product_id | product_name |
+------------+--------------+
|    100     |    Nokia     |
|    200     |    Apple     |
|    300     |   Samsung    |
+------------+--------------+
+---------+------------+------+----------+-------+
| sale_id | product_id | year | quantity | price |
+---------+------------+------+----------+-------+
|    1    |    100     | 2008 |    10    |  5000 |
|    2    |    100     | 2009 |    12    |  5000 |
|    7    |    200     | 2011 |    15    |  9000 |
+---------+------------+------+----------+-------+


### Write an SQL query that selects the product id, year, quantity, and price for the first year of every product sold.
```
+------------+------------+----------+-------+
| product_id | first_year | quantity | price |
+------------+------------+----------+-------+ 
| 100        | 2008       | 10       | 5000  |
| 200        | 2011       | 15       | 9000  |
+------------+------------+----------+-------+
```

In [3]:
%%sql 

SELECT 
    p.product_id,
    s.year AS first_year,
    s.quantity,
    s.price 
FROM Product p
RIGHT JOIN Sales s ON p.product_id = s.product_id
WHERE (p.product_id,s.year) IN (
    SELECT product_id,MIN(year) 
    FROM Sales 
    GROUP BY product_id)

product_id,first_year,quantity,price
100,2008,10,5000
200,2011,15,9000


# Using Pandas

In [4]:
import pandas as pd 

In [5]:
product_query = %sql SELECT * FROM Product #type: ignore
sales_query = %sql SELECT * FROM Sales # type: ignore 

product_df = product_query.DataFrame()
sales_df = sales_query.DataFrame()

display(product_df, sales_df)

Unnamed: 0,product_id,product_name
0,100,Nokia
1,200,Apple
2,300,Samsung


Unnamed: 0,sale_id,product_id,year,quantity,price
0,1,100,2008,10,5000
1,2,100,2009,12,5000
2,7,200,2011,15,9000


In [6]:
product_df.merge(sales_df, on='product_id', how='right')

Unnamed: 0,product_id,product_name,sale_id,year,quantity,price
0,100,Nokia,1,2008,10,5000
1,100,Nokia,2,2009,12,5000
2,200,Apple,7,2011,15,9000


In [7]:
joined_df = product_df.merge(sales_df, on='product_id', how='right')
joined_df

Unnamed: 0,product_id,product_name,sale_id,year,quantity,price
0,100,Nokia,1,2008,10,5000
1,100,Nokia,2,2009,12,5000
2,200,Apple,7,2011,15,9000


In [8]:
# Things can get pretty messy quickly
''' 
x.name[0] represents the current product ID group being processed by the lambda function. 
We use this value to filter the corresponding product ID category in the sales_df
and obtain its minimum year using min(). 
Then, we compare this minimum year with x['year'].min() to determine 
if the group should be kept or discarded.
'''
joined_df.groupby(['product_id', 'year']) \
    .filter(lambda x: x['year'].min() == sales_df[sales_df['product_id'] == x.name[0]]['year'].min())

Unnamed: 0,product_id,product_name,sale_id,year,quantity,price
0,100,Nokia,1,2008,10,5000
2,200,Apple,7,2011,15,9000


### More readable appraoch: 
- ##### *The groupby operation is performed on the 'product_id' column, and the transform method with the 'min' aggregation function is applied to the 'year' column. This creates a new column 'first_year' that holds the minimum year value for each product ID category.*

In [9]:
joined_df['first_year'] = joined_df.groupby('product_id')['year'].transform('min')
joined_df

Unnamed: 0,product_id,product_name,sale_id,year,quantity,price,first_year
0,100,Nokia,1,2008,10,5000,2008
1,100,Nokia,2,2009,12,5000,2008
2,200,Apple,7,2011,15,9000,2011


In [10]:
joined_df[joined_df['year'] == joined_df['first_year']]

Unnamed: 0,product_id,product_name,sale_id,year,quantity,price,first_year
0,100,Nokia,1,2008,10,5000,2008
2,200,Apple,7,2011,15,9000,2011


In [11]:
# better readable option is to use query
joined_df = joined_df.query("year == first_year")

In [12]:
joined_df = joined_df.drop('year', axis=1)
joined_df

Unnamed: 0,product_id,product_name,sale_id,quantity,price,first_year
0,100,Nokia,1,10,5000,2008
2,200,Apple,7,15,9000,2011
