In [3]:
import pandas as pd
import getpass
from sqlalchemy import create_engine
from sqlalchemy import text


In [None]:
password = getpass.getpass("Введите пароль от PostgreSQL: ")



In [9]:
user = "postgres"        # имя пользователя PostgreSQL
host = "localhost"        # или IP сервера
port = "5432"             # стандартный порт PostgreSQL
database = "postgres"     # название твоей базы

connection_string = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"

engine = create_engine(connection_string)
engine


Engine(postgresql+psycopg2://postgres:***@localhost:5432/postgres)

In [10]:
with engine.connect() as conn:
    result = conn.execute(text("SELECT 1"))
    print(result.scalar())
    

1


Вывести все уникальные бренды, у которых есть хотя бы один продукт со стандартной стоимостью выше 1500 долларов, и суммарными продажами не менее 1000 единиц.


In [11]:
query = text("""
    SELECT p.brand
    FROM product p
    JOIN order_items oi 
        ON p.product_id = oi.product_id
    GROUP BY p.brand
    HAVING MAX(p.standard_cost) > 1500
       AND COUNT(DISTINCT oi.order_id) >= 1000
""")

df = pd.read_sql(query, engine)
df

Unnamed: 0,brand
0,Giant Bicycles
1,OHM Cycles
2,Solex
3,Trek Bicycles


Для каждого дня в диапазоне с 2017-04-01 по 2017-04-09 включительно вывести количество подтвержденных онлайн-заказов и количество уникальных клиентов, совершивших эти заказы.


In [None]:


    select order_date::date , count(distinct order_id), count(distinct customer_id)
    from orders
    where order_date::date BETWEEN '2017-04-01' AND '2017-04-09'
    and online_order is true
    and order_status ='Approved'
    group by order_date::date
    order by order_date::date desc
""")

df_2 = pd.read_sql(query, engine)
df_2

Unnamed: 0,order_date,count,count.1
0,2017-04-09,30,30
1,2017-04-08,33,33
2,2017-04-07,24,24
3,2017-04-06,36,36
4,2017-04-05,33,32
5,2017-04-04,32,32
6,2017-04-03,27,27
7,2017-04-02,29,29
8,2017-04-01,37,37


Вывести профессии клиентов:

из сферы IT, чья профессия начинается с Senior;

из сферы Financial Services, чья профессия начинается с Lead.

Для обеих групп учитывать только клиентов старше 35 лет. Объединить выборки с помощью UNION ALL.

In [18]:
query = text("""
    select distinct job_title
    from customer
    where job_industry_category='IT'
    and lower(job_title) like 'senior%'
    and AGE(DOB) >= INTERVAL '35 years'
    union 
    select distinct job_title
    from customer
    where job_industry_category='Financial Services'
    and lower(job_title) like 'lead%'
    and AGE(DOB) >= INTERVAL '35 years'
""")

df_3 = pd.read_sql(query, engine)
df_3             

Unnamed: 0,job_title
0,Senior Developer
1,Senior Sales Associate


Вывести бренды, которые были куплены клиентами из сферы Financial Services, но не были куплены клиентами из сферы IT.

In [19]:
query = text("""
    SELECT p.brand
    FROM product p
    JOIN order_items oi ON p.product_id = oi.product_id
    JOIN orders o ON oi.order_id = o.order_id
    JOIN customer c ON o.customer_id = c.customer_id
    GROUP BY p.brand
    HAVING 
        SUM(CASE WHEN c.job_industry_category = 'Financial Services' THEN 1 ELSE 0 END) > 0
        AND
        SUM(CASE WHEN c.job_industry_category = 'IT' THEN 1 ELSE 0 END) = 0
""")

df_4 = pd.read_sql(query, engine)
df_4

Unnamed: 0,brand


Вывести 10 клиентов (ID, имя, фамилия), которые совершили наибольшее количество онлайн-заказов (в штуках) брендов Giant Bicycles, Norco Bicycles, Trek Bicycles, при условии, что они активны и имеют оценку имущества (property_valuation) выше среднего среди клиентов из того же штата.

In [None]:
query = text("""
WITH avg_prop AS (
    SELECT 
        state,
        AVG(property_valuation) AS avg_prop
    FROM customer
    GROUP BY state
    ),
brand_orders AS (
    SELECT
        c.customer_id,
        c.first_name,
        c.last_name,
        COUNT(DISTINCT o.order_id) AS online_orders_cnt
    FROM customer c
    JOIN avg_prop ap
        ON c.state = ap.state
    JOIN orders o
        ON o.customer_id = c.customer_id
       AND o.online_order = TRUE            
    JOIN order_items oi
        ON oi.order_id = o.order_id
    JOIN product p
        ON p.product_id = oi.product_id
    WHERE c.property_valuation > ap.avg_prop
      AND p.brand IN (
            'Giant Bicycles',
            'Norco Bicycles',
            'Trek Bicycles')
    GROUP BY 
        c.customer_id,
        c.first_name,
        c.last_name
    ),
ranked AS (
    SELECT
        customer_id,
        first_name,
        last_name,
        online_orders_cnt,
        RANK() OVER (ORDER BY online_orders_cnt DESC) AS rnk
    FROM brand_orders
    )   
SELECT
    customer_id,
    first_name,
    last_name,
    online_orders_cnt
FROM ranked
WHERE rnk <= 10
ORDER BY online_orders_cnt DESC, customer_id
""")

df_5 = pd.read_sql(query, engine)
df_5

Unnamed: 0,customer_id,first_name,last_name,online_orders_cnt
0,353,Antonia,Cardis,7
1,3221,Brigid,Quigley,7
2,1,Laraine,Medendorp,6
3,25,Geoff,Assaf,6
4,478,Darcey,Harberer,6
5,534,Madel,Palffrey,6
6,714,Burtie,Scintsbury,6
7,787,Norma,Batrim,6
8,1117,Georgena,Guilaem,6
9,1302,Ericka,Eggers,6


Вывести всех клиентов (ID, имя, фамилия), у которых нет подтвержденных онлайн-заказов за последний год, но при этом они владеют автомобилем и их сегмент благосостояния не Mass Customer.

In [21]:
query = text("""
with pret as (
    select customer_id
    from orders
    where online_order is true
    and order_status ='Approved'
    and date_trunc('year', order_date::date)=(select max(date_trunc('year', order_date::date)) from orders)
)
select c.customer_id,
    c.first_name,
    c.last_name
from customer c
where c.customer_id not in (select * from pret)
    and wealth_segment<> 'Mass Customer'
    and c.owns_car='Yes'
""")

df_6 = pd.read_sql(query, engine)
df_6

Unnamed: 0,customer_id,first_name,last_name
0,65,Yale,Tanser
1,71,Hoyt,Glavias
2,163,Kessia,Helder
3,184,Nappie,Paolo
4,191,Christopher,Heining
...,...,...,...
168,3976,Gretel,Chrystal
169,3989,Nicolas,Burdass
170,3992,Germain,Tireman
171,3997,Blanch,Nisuis


Вывести всех клиентов из сферы 'IT' (ID, имя, фамилия), которые купили 2 из 5 продуктов с самой высокой list_price в продуктовой линейке Road.

In [22]:
query = text("""
with pret as (
select 
RANK() OVER (ORDER BY list_price DESC) AS rnk, 
product_id
from product
where product_line='Road'
order by list_price desc
)
select
	c.customer_id,
    c.first_name,
    c.last_name
from order_items oi
join pret p on oi.product_id=p.product_id and p.rnk<=5
join orders o on  oi.order_id=o.order_id and o.order_status='Approved'
join customer c on o.customer_id =c.customer_id and job_industry_category='IT'
group by c.customer_id, c.first_name, c.last_name
having count (distinct p.product_id)>=2
""")

df_7 = pd.read_sql(query, engine)
df_7

Unnamed: 0,customer_id,first_name,last_name
0,604,Mella,Petrovsky
1,799,Harland,Spilisy
2,983,Shaylyn,Riggs
3,1683,Brenn,Bacon
4,1791,Ninon,Van Der Hoog
5,1820,Yard,Teeney
6,1887,Kynthia,Purcer
7,2469,Kermie,Hedger
8,3406,Lucy,Lackmann


Вывести клиентов (ID, имя, фамилия, сфера деятельности) из сфер IT или Health, которые совершили не менее 3 подтвержденных заказов в период 2017-01-01 по 2017-03-01, и при этом их общий доход от этих заказов превышает 10 000 долларов.
Разделить вывод на две группы (IT и Health) с помощью UNION.

In [23]:
query = text("""
select c.customer_id,
    c.first_name,
    c.last_name,
    c.job_industry_category
from customer c
join orders o  on c.customer_id=o.customer_id 
			   and o.order_status='Approved'
			   and o.order_date::date BETWEEN '2017-01-01' AND '2017-03-01'
join order_items oi on o.order_id =oi.order_id 
where c.job_industry_category='IT'
group by c.customer_id, c.first_name, c.last_name, c.job_industry_category
having count(o.order_id)>=3 
and sum(oi.quantity*oi.item_list_price_at_sale)>=10000
union
select c.customer_id,
    c.first_name,
    c.last_name,
    c.job_industry_category
from customer c
join orders o  on c.customer_id=o.customer_id 
			   and o.order_status='Approved'
			   and o.order_date::date BETWEEN '2017-01-01' AND '2017-03-01'
join order_items oi on o.order_id =oi.order_id 
where c.job_industry_category='Health'
group by c.customer_id, c.first_name, c.last_name, c.job_industry_category
having count(o.order_id)>=3 
	and sum(oi.quantity*oi.item_list_price_at_sale)>=10000
order by job_industry_category
""")

df_8 = pd.read_sql(query, engine)
df_8

Unnamed: 0,customer_id,first_name,last_name,job_industry_category
0,173,Ebba,Hanselmann,Health
1,2172,Emmery,Angrock,Health
2,590,Ddene,Burleton,Health
3,2292,Christie,MacClure,Health
4,2353,Lyn,Luquet,Health
5,2788,Melantha,Pickburn,Health
6,2815,Emery,Carlett,Health
7,607,Adelaida,Redmond,Health
8,2992,Crystal,Assur,Health
9,3166,Bronson,Plowman,Health
