In [1]:
%load_ext sql

In [2]:
from sqlalchemy import create_engine

In [3]:
%sql postgresql://postgres:root@localhost/Practice

# Question

- For pairs of brands in the same year (e.g. apple/samsung/2020 and samsung/apple/2020)
    - if custom1 = custom3 and custom2 = custom4 : then keep only one pair
- For pairs of brands in the same year
    - if custom1 != custom3 OR custom2 != custom4 : then keep both pairs
- For brands that do not have pairs in the same year : keep those rows as well

In [4]:
%%sql

DROP TABLE IF EXISTS brands;

CREATE TABLE brands 
(
    brand1      VARCHAR(20),
    brand2      VARCHAR(20),
    year        INT,
    custom1     INT,
    custom2     INT,
    custom3     INT,
    custom4     INT
);
INSERT INTO brands VALUES ('apple', 'samsung', 2020, 1, 2, 1, 2);
INSERT INTO brands VALUES ('samsung', 'apple', 2020, 1, 2, 1, 2);
INSERT INTO brands VALUES ('apple', 'samsung', 2021, 1, 2, 5, 3);
INSERT INTO brands VALUES ('samsung', 'apple', 2021, 5, 3, 1, 2);
INSERT INTO brands VALUES ('google', NULL, 2020, 5, 9, NULL, NULL);
INSERT INTO brands VALUES ('oneplus', 'nothing', 2020, 5, 9, 6, 3);

 * postgresql://postgres:***@localhost/Practice
Done.
Done.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.


[]

In [5]:
%%sql

SELECT * FROM brands;

 * postgresql://postgres:***@localhost/Practice
6 rows affected.


brand1,brand2,year,custom1,custom2,custom3,custom4
apple,samsung,2020,1,2,1.0,2.0
samsung,apple,2020,1,2,1.0,2.0
apple,samsung,2021,1,2,5.0,3.0
samsung,apple,2021,5,3,1.0,2.0
google,,2020,5,9,,
oneplus,nothing,2020,5,9,6.0,3.0


## Solution 

In [19]:
%%sql

WITH CTE AS (
            SELECT *, CASE WHEN brand1 < brand2 THEN CONCAT(brand1, brand2, year)
                    ELSE CONCAT(brand2, brand1, year) end as pair_id
            FROM brands
            ),
    CTE_rnk as (
                SELECT *, row_number() OVER(PARTITION BY pair_id ORDER BY pair_id) as rnk
                FROM CTE
                )
    SELECT brand1, brand2, year, custom1, custom2, custom3, custom4
    FROM CTE_rnk
    WHERE rnk=1 or (custom1 <> custom3 and custom2 <> custom4)
    

 * postgresql://postgres:***@localhost/Practice
5 rows affected.


brand1,brand2,year,custom1,custom2,custom3,custom4
apple,samsung,2020,1,2,1.0,2.0
apple,samsung,2021,1,2,5.0,3.0
samsung,apple,2021,5,3,1.0,2.0
google,,2020,5,9,,
oneplus,nothing,2020,5,9,6.0,3.0
