In [37]:
import pandas as pd
import mysql.connector
import sqlalchemy

In [45]:
cnx = mysql.connector.connect(user='root', password='password', host='127.0.0.1', database='otus_db', port='3306')

In [46]:
cur = cnx.cursor()

In [5]:
cur.execute('''
            DROP TABLE IF EXISTS customers;
            ''')

cur.execute('''
            CREATE TABLE customers (
            customer_id SERIAL PRIMARY KEY,
            email VARCHAR(255) UNIQUE NOT NULL,
            gender CHAR(1) NOT NULL,
            city VARCHAR(255)
            );
            ''')

In [6]:
cur.execute('''
           INSERT  INTO customers
           (email, gender, city)
            VALUES
            ('dmitry@example.com', 'М', 'Зеленоград'),
            ('vasya@example.com', 'М', 'Москва'),
            ('olga@example.com', 'Ж', 'Волгоград');
            ''')

query = f'''
            SELECT * from customers;
'''
df = pd.read_sql_query(query, cnx)
df



Unnamed: 0,customer_id,email,gender,city
0,1,dmitry@example.com,М,Зеленоград
1,2,vasya@example.com,М,Москва
2,3,olga@example.com,Ж,Волгоград


In [7]:
cur.execute('''
            DROP TABLE IF EXISTS sessions;
            ''')

cur.execute('''
            CREATE TABLE sessions (
                session_id SERIAL PRIMARY KEY,
                customer_id  BIGINT UNSIGNED NOT NULL,
                visit_dttm DATETIME NOT null,
                Purchase_flg TINYINT(1) NOT null,
            CONSTRAINT fk__sessions__customers
                FOREIGN KEY (customer_id)
                REFERENCES customers (customer_id)
            );
            ''')

In [8]:
cur.execute('''
           INSERT  INTO sessions
           (customer_id, visit_dttm, Purchase_flg)
            VALUES
            ('1', now() - INTERVAL 1 DAY , 1),
            ('2', now() - INTERVAL 1 DAY, 0),
            ('3', now() - INTERVAL 1 DAY, 1),
            ('1', now(), 1),
            ('2', now(), 0),
            ('3', now(), 1);;
            ''')

query = f'''
            SELECT * from sessions;
'''
df = pd.read_sql_query(query, cnx)
df



Unnamed: 0,session_id,customer_id,visit_dttm,Purchase_flg
0,1,1,2023-09-06 16:54:20,1
1,2,2,2023-09-06 16:54:20,0
2,3,3,2023-09-06 16:54:20,1
3,4,1,2023-09-07 16:54:20,1
4,5,2,2023-09-07 16:54:20,0
5,6,3,2023-09-07 16:54:20,1


In [13]:
cur.execute('''
            DROP TABLE IF EXISTS customers_purchases;
            ''')

cur.execute('''
            CREATE TABLE customers_purchases (
            date DATETIME NOT null,
            customer_id VARCHAR(255)NOT NULL,
            count_purchases BIGINT UNSIGNED NOT NULL
            );
            ''')

#### Процедура по заливке агрегированных данных по кол-ву покупок каждого клиента на текущую дату 

In [11]:
cur.execute('''
                    drop procedure if exists customers_purchases_insert;
            ''')

cur.execute('''
                    CREATE procedure  customers_purchases_insert()
                    BEGIN
                    INSERT  INTO customers_purchases
                   (date, customer_id, count_purchases)
                      SELECT 
                       now() as date,
                        IF(GROUPING(l.customer_id), 'ИТОГО', l.customer_id) AS customer_id,
                        count(*) as count_purchases
                        from customers l
                        join sessions r
                         on l.customer_id = r.customer_id
                         where Purchase_flg = 1
                         group by l.customer_id WITH ROLLUP;
                    END;
            ''')


#### Отключим автокоммит и сделаем ручной коммит в конце

In [14]:
cur.execute('''
             set autocommit = 0;
            ''')

cur.execute('''
             call customers_purchases_insert();
            ''')

cur.execute('''
             select * from customers_purchases;
            ''')

results = cur.fetchall()
pd.DataFrame(results, columns=['date', 'customer_id', 'count_purchases'])

cur.execute('''
             commit(); 
            ''')

Unnamed: 0,date,customer_id,count_purchases
0,2023-09-07 16:59:05,1,2
1,2023-09-07 16:59:05,3,2
2,2023-09-07 16:59:05,ИТОГО,4


#### Если не делать ручной коммит и закрыть соединение

In [None]:
cnx.close()

In [22]:
query = f'''
select * from customers_purchases;
'''
df = pd.read_sql_query(query, cnx)
df



Unnamed: 0,date,customer_id,count_purchases


#### Загрузить данные используя LOAD DATA 

In [48]:
cur.execute('''
            drop TABLE if exists test_load;
            ''')

cur.execute('''
            CREATE TABLE if not exists test_load (
                hz_1 VARCHAR(255), 
                hz_2 VARCHAR(255), 
                hz_3 VARCHAR(255), 
                hz_4 VARCHAR(255));
            ''')

cur.execute('''
           LOAD DATA INFILE '/var/lib/mysql-files/Apparel.csv'
            IGNORE INTO TABLE test_load
            FIELDS TERMINATED BY ','
            LINES TERMINATED BY '\n';
            ''')

In [49]:
query = f'''
select * from test_load;
'''
df = pd.read_sql_query(query, cnx)
df



Unnamed: 0,hz_1,hz_2,hz_3,hz_4
0,Handle,Title,Body (HTML),Vendor
1,the-scout-skincare-kit,The Scout Skincare Kit,"""<p><em>This is a demonstration store. You can...",
2,<p><span>A collection of the best Ursa Major h...,,,
3,<ul>,,,
4,"<li><span style=""""line-height: 1.4;"""">Face Was...",,,
...,...,...,...,...
252,</ul>,,,
253,"<ul class=""""tabs-content""""></ul>""",United By Blue,Bags,Bags
254,hudderton-backpack,,,
255,hudderton-backpack,,,
