In [173]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker

In [174]:
# Параметры подключения к базе данных
db_connection_string = "postgresql://postgres:1234@localhost:5432/hw_2_db"
engine = create_engine(db_connection_string)

Session = sessionmaker(bind=engine)
session = Session()

In [175]:
# Звгрузим датафреймы для переодической самопроверки через pandas
customers = session.execute(text("SELECT * FROM customer"))
customers = customers.fetchall()
customers = pd.DataFrame(customers)

transactions = session.execute(text("SELECT * FROM transaction"))
transactions = transactions.fetchall()
transactions = pd.DataFrame(transactions)

display(customers.head(), transactions.head())

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,1,Laraine,Medendorp,F,1953-10-12,Executive Secretary,Health,Mass Customer,N,Yes,060 Morning Avenue,2016,New South Wales,Australia,10
1,2,Eli,Bockman,Male,1980-12-16,Administrative Officer,Financial Services,Mass Customer,N,Yes,6 Meadow Vale Court,2153,New South Wales,Australia,10
2,3,Arlin,Dearle,Male,1954-01-20,Recruiting Manager,Property,Mass Customer,N,Yes,0 Holy Cross Court,4211,QLD,Australia,9
3,4,Talbot,,Male,1961-10-03,,IT,Mass Customer,N,No,17979 Del Mar Point,2448,New South Wales,Australia,4
4,5,Sheila-kathryn,Calton,Female,1977-05-13,Senior Editor,,Affluent Customer,N,Yes,9 Oakridge Court,3216,VIC,Australia,9


Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,1,2,2950,2017-02-25,False,Approved,Solex,Standard,medium,medium,71.49,53.62
1,2,3,3120,2017-05-21,True,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92
2,3,37,402,2017-10-16,False,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,4,88,3135,2017-08-31,False,Approved,Norco Bicycles,Standard,medium,medium,1198.46,381.1
4,5,78,787,2017-10-01,True,Approved,Giant Bicycles,Standard,medium,large,1765.3,709.48


> Будем реализовывать параметрические query-запросы для борьбы с sql-инъекциями и параллельного оттачивания наывыка работы с sqlalchemy :)
> 

---


### (1 балл) Вывести все уникальные бренды, у которых стандартная стоимость выше 1500 долларов.

In [176]:
query = text(
    """
    SELECT DISTINCT(brand) 
    FROM transaction as t
    WHERE t.standard_cost > :price;
    """
)

result_1 = session.execute(
    query,
    {
        "price": 1500,
    },
)


result_1 = result_1.fetchall()

result_1 = pd.DataFrame(result_1)

display(result_1)

Unnamed: 0,brand
0,OHM Cycles
1,Trek Bicycles
2,Solex
3,Giant Bicycles


In [177]:
# Проверочка
transactions[transactions["standard_cost"] > 1500]["brand"].unique()

array(['Trek Bicycles', 'Giant Bicycles', 'OHM Cycles', 'Solex'],
      dtype=object)

---

### (1 балл) Вывести все подтвержденные транзакции за период '2017-04-01' по '2017-04-09' включительно.

In [178]:

query = text(
    """
    SELECT * 
    FROM transaction AS t
    WHERE t.order_status = :status 
      AND t.transaction_date BETWEEN :start_date AND :end_date;
    """
)

result_2 = session.execute(
    query, {"status": "Approved", 
            "start_date": "2017-04-01", 
            "end_date": "2017-04-09"}
)

result_2 = result_2.fetchall()

# Преобразование результата в DataFrame
result_2 = pd.DataFrame(result_2)

# Вывод DataFrame
display(result_2)

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,17,79,2426,2017-04-03,False,Approved,Norco Bicycles,Standard,medium,medium,1555.58,818.01
1,19,54,2268,2017-04-06,True,Approved,WeareA2B,Standard,medium,medium,1292.84,13.44
2,23,37,2001,2017-04-08,True,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,83,0,3398,2017-04-01,True,Approved,OHM Cycles,Standard,medium,medium,235.63,125.07
4,89,0,2682,2017-04-04,True,Approved,OHM Cycles,Road,high,large,12.01,7.21
...,...,...,...,...,...,...,...,...,...,...,...,...
526,19655,0,336,2017-04-09,True,Approved,Norco Bicycles,Standard,medium,medium,360.40,270.30
527,19853,7,3072,2017-04-02,False,Approved,Trek Bicycles,Road,low,medium,980.37,234.43
528,19899,57,325,2017-04-06,False,Approved,WeareA2B,Touring,medium,large,1890.39,260.14
529,19968,0,2751,2017-04-06,False,Approved,WeareA2B,Standard,medium,medium,60.34,45.26


In [179]:
first = transactions["order_status"] == "Approved"
second = transactions["transaction_date"] >= pd.to_datetime("2017-04-01").date()
third = transactions["transaction_date"] <= pd.to_datetime("2017-04-09").date()
check_2 = transactions[first & second & third].reset_index(drop=True)

check_2

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,17,79,2426,2017-04-03,False,Approved,Norco Bicycles,Standard,medium,medium,1555.58,818.01
1,19,54,2268,2017-04-06,True,Approved,WeareA2B,Standard,medium,medium,1292.84,13.44
2,23,37,2001,2017-04-08,True,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,83,0,3398,2017-04-01,True,Approved,OHM Cycles,Standard,medium,medium,235.63,125.07
4,89,0,2682,2017-04-04,True,Approved,OHM Cycles,Road,high,large,12.01,7.21
...,...,...,...,...,...,...,...,...,...,...,...,...
526,19655,0,336,2017-04-09,True,Approved,Norco Bicycles,Standard,medium,medium,360.40,270.30
527,19853,7,3072,2017-04-02,False,Approved,Trek Bicycles,Road,low,medium,980.37,234.43
528,19899,57,325,2017-04-06,False,Approved,WeareA2B,Touring,medium,large,1890.39,260.14
529,19968,0,2751,2017-04-06,False,Approved,WeareA2B,Standard,medium,medium,60.34,45.26


---
### (1 балл) Вывести все профессии у клиентов из сферы IT или Financial Services, которые начинаются с фразы 'Senior'.

In [180]:

query = text(
    """
    SELECT * 
    FROM customer AS c
    WHERE c.job_title LIKE :prof_pref AND
    (c.job_industry_category = :status_1 
    OR c.job_industry_category = :status_2);
    """
)

result_3 = session.execute(
    query, {"status_1": "IT", 
            "status_2": "Financial Services", 
            "prof_pref": "Senior%"}
)

result_3 = result_3.fetchall()

# Преобразование результата в DataFrame
result_3 = pd.DataFrame(result_3)

# Вывод DataFrame
display(result_3)

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,10,Fiorenze,Birdall,Female,1988-10-11,Senior Quality Engineer,Financial Services,Mass Customer,N,Yes,44339 Golden Leaf Alley,4557,QLD,Australia,4
1,38,Cordi,Merman,Female,1955-10-29,Senior Cost Accountant,Financial Services,Affluent Customer,N,No,1 Claremont Park,3931,VIC,Australia,10
2,45,Trace,Woodhead,Male,1975-09-23,Senior Financial Analyst,Financial Services,Mass Customer,N,No,3107 Calypso Terrace,2210,New South Wales,Australia,9
3,91,Dylan,Meaker,Male,1955-02-24,Senior Financial Analyst,Financial Services,Affluent Customer,N,Yes,72 Dorton Crossing,3133,VIC,Australia,10
4,374,Valdemar,MacCoughen,Male,1992-01-22,Senior Financial Analyst,Financial Services,Mass Customer,N,Yes,0 Brown Trail,2203,NSW,Australia,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,599,Ernestus,Cruden,U,,Senior Financial Analyst,Financial Services,Mass Customer,N,Yes,846 Clove Junction,4300,QLD,Australia,5
77,685,Booth,Birkin,U,,Senior Developer,IT,Mass Customer,N,No,75826 Bellgrove Trail,3095,Victoria,Australia,10
78,1990,Mira,Askham,U,,Senior Financial Analyst,Financial Services,High Net Worth,N,No,35 Debra Way,3201,VIC,Australia,6
79,3151,Thorn,Choffin,U,,Senior Developer,IT,Affluent Customer,N,Yes,5323 Chive Avenue,2486,NSW,Australia,7


In [181]:
display(
    result_3["job_title"].value_counts(),
    result_3["job_industry_category"].value_counts(),
)

job_title
Senior Financial Analyst    37
Senior Cost Accountant      26
Senior Developer             6
Senior Editor                6
Senior Quality Engineer      4
Senior Sales Associate       2
Name: count, dtype: int64

job_industry_category
Financial Services    75
IT                     6
Name: count, dtype: int64

___
### (1 балл) Вывести все бренды, которые закупают клиенты, работающие в сфере Financial Services

In [182]:
query = text(
    """
    SELECT t.brand
    FROM transaction AS t
    WHERE t.customer_id in (
        SELECT c.customer_id
        FROM customer AS c
        WHERE c.job_industry_category = :status
    );
    """
)

result_4 = session.execute(
    query,
    {
        "status": "Financial Services",
    },
)

result_4 = result_4.fetchall()

# Преобразование результата в DataFrame
result_4 = pd.DataFrame(result_4)

# Вывод DataFrame
display(result_4)

Unnamed: 0,brand
0,Solex
1,Norco Bicycles
2,Solex
3,Giant Bicycles
4,Giant Bicycles
...,...
3881,Trek Bicycles
3882,Solex
3883,Norco Bicycles
3884,Solex


___
### (1 балл) Вывести 10 клиентов, которые оформили онлайн-заказ продукции из брендов 'Giant Bicycles', 'Norco Bicycles', 'Trek Bicycles'.

In [183]:
query = text(
    """
    SELECT *
    FROM customer AS c
    WHERE c.customer_id in (
        SELECT t.customer_id
        FROM transaction AS t
        WHERE t.brand in :brand
    )
    LIMIT 10;
    """
)

result_4 = session.execute(
    query,
    {
        "brand": ("Giant Bicycles", 
                  "Norco Bicycles", 
                  "Trek Bicycles"),
    },
)

result_4 = result_4.fetchall()

# Преобразование результата в DataFrame
result_4 = pd.DataFrame(result_4)

# Вывод DataFrame
display(result_4)

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,1489,Mead,Wollard,Male,1993-05-13,Teacher,Health,Mass Customer,N,No,9 Sycamore Pass,2287,NSW,Australia,5
1,273,Nevile,Abraham,Male,1963-04-22,Computer Systems Analyst IV,,Mass Customer,N,No,19795 Bultman Circle,4214,QLD,Australia,8
2,2574,Humfrey,Boyse,Male,1979-12-21,,Manufacturing,Mass Customer,N,No,71725 Goodland Lane,4509,QLD,Australia,7
3,951,Meryl,Dumbar,Male,1999-09-29,Assistant Media Planner,Entertainment,Mass Customer,N,No,20 Carpenter Parkway,2192,NSW,Australia,9
4,2614,Anatollo,Stendell,Male,1957-10-16,,Retail,High Net Worth,N,No,2271 Fordem Plaza,2780,NSW,Australia,8
5,2520,Gabie,Skett,Male,1974-05-27,Accountant III,Retail,Affluent Customer,N,Yes,82 Lukken Lane,2190,NSW,Australia,10
6,2466,Araldo,Klamp,Male,1980-10-04,Software Test Engineer IV,Entertainment,High Net Worth,N,No,6247 Crest Line Terrace,2250,NSW,Australia,8
7,2196,Skylar,Brise,Male,1976-06-08,Media Manager I,Retail,High Net Worth,N,Yes,3 Bashford Plaza,2267,NSW,Australia,5
8,1750,Filbert,Evreux,Male,1959-05-13,Internal Auditor,Entertainment,Mass Customer,N,Yes,84707 Harper Circle,3114,VIC,Australia,10
9,176,Angelo,Clayal,Male,1992-04-13,Accountant I,Property,Mass Customer,N,No,216 Barnett Junction,4120,QLD,Australia,8


---
### (1 балл) Вывести всех клиентов, у которых нет транзакций.

In [184]:
query = text(
    """
    SELECT *
    FROM customer AS c
    WHERE c.customer_id NOT IN (
        SELECT t.customer_id
        FROM transaction AS t
    );
    """
)

result_5 = session.execute(query)

result_5 = result_5.fetchall()

# Преобразование результата в DataFrame
result_5 = pd.DataFrame(result_5)

# Вывод DataFrame
display(result_5)

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,3756,Barry,,Male,1977-07-08,,,Affluent Customer,N,No,011 Johnson Park,3121,VIC,Australia,11
1,852,Andie,Bonney,Female,2000-11-04,Compensation Analyst,Financial Services,Affluent Customer,N,Yes,94 Anhalt Way,3139,VIC,Australia,7
2,869,Addia,Abels,Female,1984-03-11,Account Representative I,Financial Services,High Net Worth,N,Yes,02377 Maywood Trail,2287,NSW,Australia,7
3,1373,Shaylynn,Epsley,Female,1958-09-23,Director of Sales,Financial Services,Mass Customer,N,Yes,0 Grasskamp Pass,3170,VIC,Australia,10
4,2074,Roslyn,Rawdall,Female,1997-06-11,,Financial Services,Mass Customer,N,No,95483 Washington Junction,2505,NSW,Australia,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502,3727,Eba,Youle,U,,Assistant Professor,IT,Mass Customer,N,No,1940 Morning Circle,2765,NSW,Australia,9
503,3779,Ulick,Daspar,U,,,IT,Affluent Customer,N,No,27784 Hansons Trail,3078,VIC,Australia,11
504,3883,Nissa,Conrad,U,,Legal Assistant,IT,Mass Customer,N,No,1728 Springview Lane,4509,QLD,Australia,6
505,3935,Teodor,Alfonsini,U,,,IT,High Net Worth,N,Yes,0900 Northport Point,2085,NSW,Australia,11


In [185]:
# Проверка рандомного id-шника
print("Проверка клиента без транзакций:")
test_client = np.random.choice(result_5["customer_id"].unique())
try:
    assert len(transactions[transactions["customer_id"] == test_client]) == 0
    print(f"Проверка пройдена клиент {test_client} отсутстует в транзакциях")
except AssertionError as e:
    print("Ошибка клиент имеется")

Проверка клиента без транзакций:
Проверка пройдена клиент 3986 отсутстует в транзакциях


---
### (2 балла) Вывести всех клиентов из IT, у которых транзакции с максимальной стандартной стоимостью.

In [186]:
query = text(
    """
    SELECT *
    FROM customer AS c
    LEFT JOIN transaction AS t
    ON c.customer_id = t.customer_id
    where c.job_industry_category= 'IT'
    and t.standard_cost = (
    select max(t.standard_cost)
    from transaction as t);

    """
)

result_6 = session.execute(query)

result_6 = result_6.fetchall()

# Преобразование результата в DataFrame
result_6 = pd.DataFrame(result_6)

# Вывод DataFrame
display(result_6)
print(result_6['customer_id'])

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,...,customer_id.1,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,3473,Sanderson,Alloway,U,,Analog Circuit Design manager,IT,Mass Customer,N,No,...,3473,2017-12-18,False,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
1,893,Gibby,Fearnley,Male,1983-09-11,Geologist I,IT,Mass Customer,N,No,...,893,2017-02-19,False,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
2,3151,Thorn,Choffin,U,,Senior Developer,IT,Affluent Customer,N,Yes,...,3151,2017-10-01,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
3,34,Jephthah,Bachmann,U,1843-12-21,Legal Assistant,IT,Affluent Customer,N,No,...,34,2017-06-25,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
4,2913,Padraic,Bonnar,Male,1955-07-11,VP Quality Control,IT,Affluent Customer,N,Yes,...,2913,2017-07-25,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
5,1918,Devin,Sandeson,U,,Staff Accountant II,IT,Affluent Customer,N,No,...,1918,2017-06-26,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
6,1672,Sharla,Creebo,Female,1963-04-27,Design Engineer,IT,Affluent Customer,N,Yes,...,1672,2017-03-14,,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
7,975,Goldarina,Rzehorz,U,,Automation Specialist IV,IT,Mass Customer,N,No,...,975,2017-06-22,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
8,1773,Nickolas,Guittet,U,,,IT,Mass Customer,N,Yes,...,1773,2017-04-05,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85


   customer_id  customer_id
0         3473         3473
1          893          893
2         3151         3151
3           34           34
4         2913         2913
5         1918         1918
6         1672         1672
7          975          975
8         1773         1773


In [187]:
first_filter = transactions["standard_cost"].max()

check_6 = pd.merge(
    left=customers, right=transactions, left_on="customer_id", right_on="customer_id"
)
check_6 = check_6[check_6["job_industry_category"] == "IT"]
check_6 = check_6[check_6["standard_cost"] == first_filter]
check_6

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,...,product_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
204,34,Jephthah,Bachmann,U,1843-12-21,Legal Assistant,IT,Affluent Customer,N,No,...,60,2017-06-25,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
5068,893,Gibby,Fearnley,Male,1983-09-11,Geologist I,IT,Mass Customer,N,No,...,60,2017-02-19,False,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
5807,975,Goldarina,Rzehorz,U,,Automation Specialist IV,IT,Mass Customer,N,No,...,60,2017-06-22,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
9455,1672,Sharla,Creebo,Female,1963-04-27,Design Engineer,IT,Affluent Customer,N,Yes,...,60,2017-03-14,,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
16391,2913,Padraic,Bonnar,Male,1955-07-11,VP Quality Control,IT,Affluent Customer,N,Yes,...,60,2017-07-25,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
19784,1773,Nickolas,Guittet,U,,,IT,Mass Customer,N,Yes,...,60,2017-04-05,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
19799,1918,Devin,Sandeson,U,,Staff Accountant II,IT,Affluent Customer,N,No,...,60,2017-06-26,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
19939,3151,Thorn,Choffin,U,,Senior Developer,IT,Affluent Customer,N,Yes,...,60,2017-10-01,True,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85
19993,3473,Sanderson,Alloway,U,,Analog Circuit Design manager,IT,Mass Customer,N,No,...,60,2017-12-18,False,Approved,Giant Bicycles,Standard,high,small,1977.36,1759.85


---
### (2 балла) Вывести всех клиентов из сферы IT и Health, у которых есть подтвержденные транзакции за период '2017-07-07' по '2017-07-17'.

In [190]:
query = text(
    """
    SELECT *
    FROM customer c
    WHERE c.customer_id 
    IN
        (
        SELECT c.customer_id 
        FROM customer AS c
        INNER JOIN transaction as t
        ON c.customer_id = t.customer_id 
        WHERE c.job_industry_category IN :job_industry
        AND (t.transaction_date BETWEEN :date_from and :date_to)
        AND t.order_status = :order_status
        );
    """
)

result_7 = session.execute(
    query,
    {
        "job_industry": ("IT", "Health"),
        "date_from": "2017-07-07",
        "date_to": "2017-07-17",
        "order_status": "Approved",
        
    },
)

result_7 = result_7.fetchall()

# Преобразование результата в DataFrame
result_7 = pd.DataFrame(result_7)

# Вывод DataFrame
display(result_7)

Unnamed: 0,customer_id,first_name,last_name,gender,dob,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,2341,Caterina,Scedall,U,,Web Designer II,IT,Mass Customer,N,Yes,4 Huxley Alley,4070,QLD,Australia,8
1,2701,Ileana,Timms,Female,1960-07-28,Dental Hygienist,Health,Affluent Customer,N,Yes,7107 Manitowish Road,4815,QLD,Australia,2
2,1489,Mead,Wollard,Male,1993-05-13,Teacher,Health,Mass Customer,N,No,9 Sycamore Pass,2287,NSW,Australia,5
3,753,Josy,St. Quentin,Female,1970-03-07,Food Chemist,Health,Affluent Customer,Y,Yes,195 Knutson Place,4019,QLD,Australia,8
4,708,Idalina,Wilcot,Female,1987-02-15,Safety Technician II,IT,Mass Customer,N,Yes,6 Morning Drive,2232,NSW,Australia,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,779,Maddy,Larrat,Female,1992-02-10,Staff Accountant III,Health,High Net Worth,N,No,68 Chinook Street,4814,QLD,Australia,2
111,1231,Aloisia,Shawel,Female,1973-07-28,Social Worker,Health,Affluent Customer,N,No,67348 Lakewood Gardens Road,2852,NSW,Australia,2
112,1907,Lind,Dredge,Male,1974-11-04,VP Sales,Health,High Net Worth,N,No,2 Maryland Drive,4227,QLD,Australia,7
113,612,Grange,Skillington,Male,1962-01-12,GIS Technical Architect,Health,Mass Customer,N,No,608 Basil Alley,2306,NSW,Australia,4


---
Закрываем сессию

In [191]:
# Закрытие сессии
session.close()