In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
from sqlalchemy.sql import text

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


### Create schema and test

In [2]:
# run ```docker-compose --profile hw_dbs up``` to raise the db
engine = create_engine('postgresql://postgres:admin@master_ds_dbs_postgre:5432/hw2')
if not database_exists(engine.url):
    create_database(engine.url)

database_exists(engine.url)

True

In [3]:
tables = {
    "customer": """CREATE TABLE customer (
      customer_id serial primary key,
      first_name varchar(32) not null,
      last_name varchar(32),
      gender varchar(6) not null,
      dob date,
      job_title varchar(64),
      job_industry_category varchar(64),
      wealth_segment varchar(32) not null,
      deceased_indicator char(1) not null,
      owns_car boolean not null,
      address text not null,
      postcode char(4) not null,
      state varchar(32) not null,
      country varchar(32) not null,
      property_valuation int not null
    )
    """,

    "transaction": """CREATE TABLE transaction (
      transaction_id serial primary key,
      customer_id serial,
      product_id serial not null,
      transaction_date date,
      online_order bool,
      order_status varchar(16),
      brand varchar(32),
      product_line varchar(16),
      product_class varchar(16),
      product_size varchar(16),
      list_price numeric,
      standard_cost numeric
    )
    """
}
with engine.connect() as con:
    for table, create in tables.items():
        con.execute(text(f"DROP TABLE IF EXISTS {table} CASCADE"))
        con.execute(text(create))
    con.commit()

In [4]:
%load_ext sql

In [5]:
customer = pd.read_csv('./datasets/customer.csv', sep=';')
transaction = pd.read_csv('./datasets/transaction.csv', sep=';')
customer.rename(columns={'DOB': 'dob'}, inplace=True)

customer.head(2)

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,1,Laraine,Medendorp,F,1953-10-12,Executive Secretary,Health,Mass Customer,N,Yes,060 Morning Avenue,2016,New South Wales,Australia,10
1,2,Eli,Bockman,Male,1980-12-16,Administrative Officer,Financial Services,Mass Customer,N,Yes,6 Meadow Vale Court,2153,New South Wales,Australia,10


In [6]:
transaction.head(2)

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,1,2,2950,25.02.2017,False,Approved,Solex,Standard,medium,medium,7149,5362
1,2,3,3120,21.05.2017,True,Approved,Trek Bicycles,Standard,medium,large,209147,38892


In [7]:
transaction['transaction_date'] = pd.to_datetime(transaction['transaction_date'])
transaction['list_price'] = pd.to_numeric(transaction['list_price'].str.replace(',', '.'))
transaction['standard_cost'] = pd.to_numeric(transaction['standard_cost'].str.replace(',', '.'))

  transaction['transaction_date'] = pd.to_datetime(transaction['transaction_date'])


In [8]:
customer.to_sql('customer', con=engine, index=False, if_exists='append')
transaction.to_sql('transaction', con=engine, index=False, if_exists='append', )

1000

In [9]:
%sql postgresql://postgres:admin@master_ds_dbs_postgre:5432/hw2

In [10]:
%%sql
SELECT * FROM customer LIMIT 10;

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
10 rows affected.


customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
1,Laraine,Medendorp,F,1953-10-12,Executive Secretary,Health,Mass Customer,N,True,060 Morning Avenue,2016,New South Wales,Australia,10
2,Eli,Bockman,Male,1980-12-16,Administrative Officer,Financial Services,Mass Customer,N,True,6 Meadow Vale Court,2153,New South Wales,Australia,10
3,Arlin,Dearle,Male,1954-01-20,Recruiting Manager,Property,Mass Customer,N,True,0 Holy Cross Court,4211,QLD,Australia,9
4,Talbot,,Male,1961-10-03,,IT,Mass Customer,N,False,17979 Del Mar Point,2448,New South Wales,Australia,4
5,Sheila-kathryn,Calton,Female,1977-05-13,Senior Editor,,Affluent Customer,N,True,9 Oakridge Court,3216,VIC,Australia,9
6,Curr,Duckhouse,Male,1966-09-16,,Retail,High Net Worth,N,True,4 Delaware Trail,2210,New South Wales,Australia,9
7,Fina,Merali,Female,1976-02-23,,Financial Services,Affluent Customer,N,True,49 Londonderry Lane,2650,New South Wales,Australia,4
8,Rod,Inder,Male,1962-03-30,Media Manager I,,Mass Customer,N,False,97736 7th Trail,2023,New South Wales,Australia,12
9,Mala,Lind,Female,1973-03-10,Business Systems Development Analyst,Argiculture,Affluent Customer,N,True,93405 Ludington Park,3044,VIC,Australia,8
10,Fiorenze,Birdall,Female,1988-10-11,Senior Quality Engineer,Financial Services,Mass Customer,N,True,44339 Golden Leaf Alley,4557,QLD,Australia,4


In [11]:
%%sql
SELECT * FROM transaction LIMIT 10;

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
10 rows affected.


transaction_id,customer_id,product_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
1,2950,2,2017-02-25,False,Approved,Solex,Standard,medium,medium,71.49,53.62
2,3120,3,2017-05-21,True,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92
3,402,37,2017-10-16,False,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
4,3135,88,2017-08-31,False,Approved,Norco Bicycles,Standard,medium,medium,1198.46,381.1
5,787,78,2017-10-01,True,Approved,Giant Bicycles,Standard,medium,large,1765.3,709.48
6,2339,25,2017-03-08,True,Approved,Giant Bicycles,Road,medium,medium,1538.99,829.65
7,1542,22,2017-04-21,True,Approved,WeareA2B,Standard,medium,medium,60.34,45.26
8,2459,15,2017-07-15,False,Approved,WeareA2B,Standard,medium,medium,1292.84,13.44
9,1305,67,2017-08-10,False,Approved,Solex,Standard,medium,large,1071.23,380.74
10,3262,12,2017-08-30,True,Approved,WeareA2B,Standard,medium,medium,1231.15,161.6


### Tasks

#### Task 1

(1 балл) Вывести все уникальные бренды, у которых стандартная стоимость выше 1500 долларов.

In [32]:
%%sql

SELECT DISTINCT brand
FROM transaction
WHERE standard_cost > 1500;

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
4 rows affected.


brand
OHM Cycles
Trek Bicycles
Solex
Giant Bicycles


#### Task 2

(1 балл) Вывести все подтвержденные транзакции за период '2017-04-01' по '2017-04-09' включительно.

In [21]:
%%sql

SELECT *
FROM transaction
WHERE transaction_date BETWEEN '2017-04-01' AND '2017-04-09'
    AND order_status = 'Approved'
LIMIT 10; --добавил, т.к. ipython-sql отправляет все 551 наблюдений в output; не нашел как ограничить на уровне kernel.   

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
10 rows affected.


transaction_id,customer_id,product_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
17,2426,79,2017-04-03,False,Approved,Norco Bicycles,Standard,medium,medium,1555.58,818.01
19,2268,54,2017-04-06,True,Approved,WeareA2B,Standard,medium,medium,1292.84,13.44
23,2001,37,2017-04-08,True,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
83,3398,0,2017-04-01,True,Approved,OHM Cycles,Standard,medium,medium,235.63,125.07
89,2682,0,2017-04-04,True,Approved,OHM Cycles,Road,high,large,12.01,7.21
126,773,53,2017-04-01,False,Approved,OHM Cycles,Standard,medium,medium,795.34,101.58
146,3261,41,2017-04-06,True,Approved,Solex,Road,medium,medium,416.98,312.74
154,3383,3,2017-04-06,True,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92
220,1502,10,2017-04-09,True,Approved,WeareA2B,Touring,medium,medium,1466.68,363.25
228,821,13,2017-04-04,True,Approved,Solex,Standard,medium,medium,1163.89,589.27


#### Task 3

(1 балл) Вывести все профессии у клиентов из сферы IT или Financial Services, которые начинаются с фразы 'Senior'.

In [16]:
%%sql

SELECT DISTINCT c.job_title
FROM customer c
INNER JOIN transaction t ON c.customer_id = t.customer_id
WHERE c.job_industry_category IN ('IT', 'Financial Services')
AND c.job_title LIKE 'Senior%';

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
6 rows affected.


job_title
Senior Sales Associate
Senior Financial Analyst
Senior Editor
Senior Quality Engineer
Senior Developer
Senior Cost Accountant


#### Task 4

(1 балл) Вы|вести все бренды, которые закупают клиенты, работающие в сфере Financial Services

In [17]:
%%sql

SELECT DISTINCT t.brand
FROM transaction t
INNER JOIN customer c ON t.customer_id = c.customer_id
WHERE c.job_industry_category = 'Financial Services';

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
7 rows affected.


brand
""
Trek Bicycles
WeareA2B
Solex
Giant Bicycles
OHM Cycles
Norco Bicycles


#### Task 5

(1 балл) Вывести 10 клиентов, которые оформили онлайн-заказ продукции из брендов 'Giant Bicycles', 'Norco Bicycles', 'Trek Bicycles'.

In [33]:
%%sql

SELECT DISTINCT c.customer_id, c.first_name, c.last_name
FROM transaction t
INNER JOIN customer c ON t.customer_id = c.customer_id
WHERE t.online_order = 'TRUE'
    AND t.brand IN ('Giant Bicycles', 'Norco Bicycles', 'Trek Bicycles')
LIMIT 10;

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
10 rows affected.


customer_id,first_name,last_name
742,Dexter,Robelin
3123,Tina,Riggulsford
20,Basile,Firth
1894,Patten,Laytham
3426,Ron,Dilon
1179,Kerry,Pashenkov
3491,Leanna,Cromb
2201,Trisha,Basset
3095,Joachim,
1521,Pernell,Duffett


#### Task 6

(1 балл) Вывести всех клиентов, у которых нет транзакций.

In [36]:
%%sql

SELECT c.customer_id, c.first_name, c.last_name
FROM customer c
LEFT JOIN transaction t ON c.customer_id = t.customer_id
WHERE t.transaction_id IS NULL
LIMIT 10; -- аналогично task 2; получаем 507 строк без ограничения

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
10 rows affected.


customer_id,first_name,last_name
3565,Charyl,Pottiphar
3535,Bren,Dabbes
3647,Carlyle,Frape
3903,Dayna,Cawthera
3519,Aldus,Kenningley
3612,Normand,Matous
3652,Aldrich,Camble
3949,Costa,Sleightholm
3704,Haslett,Ropars
3989,Nicolas,Burdass


#### Task 7

(2 балла) Вывести всех клиентов из IT, у которых транзакции с максимальной стандартной стоимостью.

In [37]:
%%sql

SELECT c.customer_id, c.first_name, c.last_name, t.standard_cost
FROM customer c
INNER JOIN transaction t ON c.customer_id = t.customer_id
WHERE c.job_industry_category = 'IT'
    AND t.standard_cost = (
        SELECT max(standard_cost)
        FROM transaction
    );

 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
9 rows affected.


customer_id,first_name,last_name,standard_cost
3473,Sanderson,Alloway,1759.85
893,Gibby,Fearnley,1759.85
3151,Thorn,Choffin,1759.85
34,Jephthah,Bachmann,1759.85
2913,Padraic,Bonnar,1759.85
1918,Devin,Sandeson,1759.85
1672,Sharla,Creebo,1759.85
975,Goldarina,Rzehorz,1759.85
1773,Nickolas,Guittet,1759.85


#### Task 8

(2 балла) Вывести всех клиентов из сферы IT и Health, у которых есть подтвержденные транзакции за период '2017-07-07' по '2017-07-17'.

In [38]:
%%sql

SELECT DISTINCT c.customer_id, c.first_name, c.last_name, c.job_industry_category
FROM customer c
INNER JOIN transaction t ON c.customer_id = t.customer_id
WHERE c.job_industry_category in ('IT', 'Health')
    AND t.transaction_date BETWEEN '2017-07-07' AND '2017-07-17'
    AND t.order_status = 'Approved';


 * postgresql://postgres:***@master_ds_dbs_postgre:5432/hw2
115 rows affected.


customer_id,first_name,last_name,job_industry_category
22,Deeanne,Durtnell,IT
28,Fee,Zellmer,Health
41,Basilius,Coupe,Health
47,Matthew,Jeaycock,Health
104,Odille,Panketh,Health
235,Leona,Phateplace,Health
239,Wells,Pressman,Health
249,D'arcy,Slay,IT
289,Modestia,Lithgow,Health
290,Giorgio,Kevane,IT
