# Подсчет частотности пар товаров в продуктовых чеках

В файле содержится информация о покупках людей.

* id – означает покупку (в одну покупку входят все товары, купленные пользователем во время 1 похода в магазин)
* Товар – наименование товара
* Количество – число единиц купленного товара

Воспользуйтесь этими данными и выясните, какие пары товаров пользователи чаще всего покупают вместе. По сути, вам необходимо найти паттерны покупок, что позволит оптимизировать размещение продуктов в магазине, для удобства пользователей и увеличения выручки.

In [1]:
%%capture
# Install postgresql server
!sudo apt-get -y -qq update
!sudo apt-get -y -qq install postgresql
!sudo service postgresql start

# Setup a password `postgres` for username `postgres`
!sudo -u postgres psql -U postgres -c "ALTER USER postgres PASSWORD 'postgres';"

# Setup a database with name `db` to be used
!sudo -u postgres psql -U postgres -c 'DROP DATABASE IF EXISTS db;'
!sudo -u postgres psql -U postgres -c 'CREATE DATABASE db;'

In [2]:
import psycopg2
from psycopg2 import Error
import pandas as pd
import os
from sqlalchemy import create_engine
import numpy as np
import random

  """)


In [3]:
user = 'postgres'
password = 'postgres'
host = 'localhost'
post = 5432
db = 'db'

In [4]:
point = 'postgresql://{}:{}@{}:{}/{}'.format(user, password, host, post, db)
con =  create_engine(point)

In [5]:
def create_table() -> None:
    """Создать таблицу в БД """
    try:
        conn = psycopg2.connect(point)
        cursor = conn.cursor()
        cursor.execute(
            """CREATE TABLE IF NOT EXISTS sales (check_number SERIAL NOT NULL,
				                                         product_name VARCHAR (25) NOT NULL,
				                                         quantity SERIAL NOT NULL);"""
                       )
        conn.commit()
    except (Exception, Error) as error:
        print("Ошибка при работе с PostgreSQL", error)
    finally:
        if conn:
            cursor.close()
            conn.close()
            print("Соединение с PostgreSQL закрыто")

In [6]:
create_table()

Соединение с PostgreSQL закрыто


In [21]:
list_check_number = []
list_product_name = []
list_quantity = []
for _ in range(10000):
  list_check_number.append(random.randint(1,1000))

  current_product = "pr_" + str(random.randint(1,100))
  list_product_name.append(current_product)

  list_quantity.append(random.randint(1,10))

In [22]:
d = {'check_number': list_check_number, 'product_name': list_product_name, 'quantity': list_quantity}

In [23]:
df = pd.DataFrame(data=d)

In [24]:
df.head()

Unnamed: 0,check_number,product_name,quantity
0,2,pr_2,9
1,3,pr_2,5
2,3,pr_3,6
3,1,pr_1,9
4,1,pr_3,6


In [95]:
# df = pd.read_csv('df_sales.csv')

In [96]:
df.to_sql('sales', con, index=False, if_exists='replace', method='multi')

In [97]:
def select_postgresql(sql: str):
    """Запрос данных из БД"""
    conn = psycopg2.connect(point)
    return pd.read_sql(sql, conn)

In [98]:
sql = """with tbl as (select s.*, row_number() over (order by s.product_name) as id
                      from (select distinct s.product_name
                          from sales as s) as s),
              tbl_two_products as (select t1.product_name as product_1, t2.product_name as product_2
                                   from tbl as t1, tbl as t2
                                   where (t1.id<>t2.id) and (t1.id<t2.id))
          select *
          from tbl_two_products"""

In [99]:
select_postgresql(sql)

Unnamed: 0,product_1,product_2
0,pr_1,pr_10
1,pr_1,pr_100
2,pr_1,pr_11
3,pr_1,pr_12
4,pr_1,pr_13
...,...,...
4945,pr_96,pr_98
4946,pr_96,pr_99
4947,pr_97,pr_98
4948,pr_97,pr_99


In [100]:
sql = """with tbl_check_group_product_name as (select s.check_number, array_agg(s.product_name) as list_product_name
                                              from sales as s
                                              group by s.check_number)
              select *
              from tbl_check_group_product_name"""

In [101]:
select_postgresql(sql)

Unnamed: 0,check_number,list_product_name
0,652,"[pr_10, pr_16, pr_23, pr_24, pr_35, pr_45, pr_..."
1,273,"[pr_14, pr_18, pr_29, pr_3, pr_47, pr_48, pr_4..."
2,51,"[pr_42, pr_47, pr_53, pr_59, pr_95]"
3,951,"[pr_12, pr_18, pr_18, pr_36, pr_50, pr_51, pr_..."
4,839,"[pr_14, pr_21, pr_28, pr_32, pr_37, pr_51, pr_..."
...,...,...
995,64,"[pr_12, pr_17, pr_28, pr_48, pr_50, pr_57, pr_..."
996,55,"[pr_100, pr_12, pr_19, pr_2, pr_25, pr_34, pr_..."
997,148,"[pr_13, pr_35, pr_43, pr_48, pr_54, pr_55, pr_..."
998,790,"[pr_12, pr_14, pr_15, pr_21, pr_24, pr_28, pr_..."


In [102]:
sql = """with tbl as (select s.*, row_number() over (order by s.product_name) as id
                      from (select distinct s.product_name
                          from sales as s) as s),
              tbl_two_products as (select t1.product_name as product_1, t2.product_name as product_2
                                   from tbl as t1, tbl as t2
                                   where (t1.id<>t2.id) and (t1.id<t2.id)),              
             tbl_check_group_product_name as (select s.check_number, array_agg(s.product_name) as list_product_name
                                              from sales as s
                                              group by s.check_number)
             select gr.product_1, gr.product_2, count(gr.list_product_name) as amount
             from (select pr.product_1, pr.product_2, ch.list_product_name
                    from tbl_two_products as pr left join tbl_check_group_product_name as ch 
                                                on cast(pr.product_1 as text) = any(ch.list_product_name)
                                                  and cast(pr.product_2 as text) = any(ch.list_product_name)
              where ch.list_product_name is not null) as gr
              group by gr.product_1, gr.product_2
              order by count(gr.list_product_name) desc
              limit 5"""

In [103]:
select_postgresql(sql)

Unnamed: 0,product_1,product_2,amount
0,pr_20,pr_49,22
1,pr_39,pr_71,21
2,pr_19,pr_2,20
3,pr_29,pr_86,20
4,pr_33,pr_57,20
