In [70]:
import sqlite3 
from sqlite3 import Error 
import pandas as pd

### Объявляем вспомогательные функции

In [71]:
def create_connection(path):
    connection = None
    try:
        connection = sqlite3.connect(path)
        print("Connection to SQLite DB successful")
    except Error as e:
        print(f"The error '{e}' occurred")

    return connection

In [72]:
def execute_query(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        connection.commit()
        print("Query executed successfully")
    except Error as e:
        print(f"The error '{e}' occurred")

## Шаг 1
### Устанавливаем соединение с базой, создаем таблицы и загружаем csv-файлы в таблицы

In [73]:
connection = create_connection("transaction.db")

Connection to SQLite DB successful


In [74]:
create_transation_table = """
    CREATE TABLE IF NOT EXISTS transaction_bd (
        TRANSACTION_ID INTEGER,
        TX_DATETIME NUMERIC,
        CUSTOMER_ID INTEGER,
        TERMINAL_ID INTEGER,
        TX_AMOUNT REAL DEFAULT 0
    );
"""

In [75]:
execute_query(connection, create_transation_table)  

Query executed successfully


In [76]:
df = pd.read_csv("transactions_for_dz2.csv")
df.to_sql("transaction_bd", connection, if_exists="append", index=False)

1048575

In [77]:
pd.set_option('display.float_format', '{:.2f}'.format)

In [78]:
pd.read_sql(f"SELECT * FROM transaction_bd", connection)

Unnamed: 0,TRANSACTION_ID,TX_DATETIME,CUSTOMER_ID,TERMINAL_ID,TX_AMOUNT
0,0,2023-01-01 00:00:31,596,3156,533.07
1,1,2023-01-01 00:02:10,4961,3412,808.56
2,2,2023-01-01 00:07:56,2,1365,1442.94
3,3,2023-01-01 00:09:29,4128,8737,620.65
4,4,2023-01-01 00:10:34,927,9906,490.66
...,...,...,...,...,...
1048570,1048570,2023-04-20 10:07:13,2380,3780,325.64
1048571,1048571,2023-04-20 10:07:28,738,5151,20.38
1048572,1048572,2023-04-20 10:07:33,1000,5417,182.79
1048573,1048573,2023-04-20 10:07:39,3028,6439,455.44


In [79]:
create_customer_table = """
    CREATE TABLE IF NOT EXISTS customer_bd (
        CLIENT_ID INTEGER,
        START_DT NUMERIC,
        END_DT NUMERIC,
        CLIENT_NAME TEXT,
        YEAR_BIRTH TEXT
    );
"""

In [80]:
execute_query(connection, create_customer_table)  

Query executed successfully


In [81]:
df = pd.read_csv("client_info.csv", sep=";")
df.to_sql("customer_bd", connection, if_exists="append", index=False)

4988

In [82]:
pd.read_sql(f"SELECT * FROM customer_bd", connection)

Unnamed: 0,CLIENT_ID,START_DT,END_DT,CLIENT_NAME,YEAR_BIRTH
0,2213,2015-07-16,2021-01-01,Olivia,1979
1,1148,2015-07-17,2021-01-01,Emma,1979
2,2293,2015-07-18,2021-01-01,Charlotte,1979
3,1867,2015-07-19,2021-01-01,Amelia,1979
4,1767,2015-07-20,2021-01-01,Ava,1979
...,...,...,...,...,...
4983,4737,2023-04-20,2999-12-31,Havilah,2000
4984,2301,2023-04-20,2999-12-31,Hazelyn,2000
4985,3238,2023-04-20,2999-12-31,Helaina,2000
4986,3946,2023-04-20,2999-12-31,Helene,2000


## Шаг 2

### A. Клиенты с максимальной суммой транзакций за весь период
(клиент должен быть действующим, то есть дата закрытия записи о клиенте не должна быть меньше дня относительно которого мы считаемся)

In [83]:
df = pd.read_sql(
    f"""
        WITH gt AS
        (SELECT
             CUSTOMER_ID,
             SUM(TX_AMOUNT) AS SUM_TX_AMOUNT
         FROM
             transaction_bd
         GROUP BY
             CUSTOMER_ID
        )
        SELECT 
            c.CLIENT_NAME,
            gt.SUM_TX_AMOUNT,
            c.END_DT
        FROM
            gt
        LEFT JOIN
            customer_bd c
                ON gt.CUSTOMER_ID = c.CLIENT_ID
        WHERE
                gt.SUM_TX_AMOUNT = (SELECT MAX(SUM_TX_AMOUNT) FROM gt)
            AND DATE(c.END_DT) >= DATE('2023-05-01')
        ;
    """, connection)
df

Unnamed: 0,CLIENT_NAME,SUM_TX_AMOUNT,END_DT
0,Kylee,786115.87,2999-12-31


### B. Имя клиента/клиентов с минимальной суммой транзакций за весь период

In [84]:
df = pd.read_sql(
    f"""
        WITH gt AS
        (SELECT
             CUSTOMER_ID,
             SUM(TX_AMOUNT) AS SUM_TX_AMOUNT
         FROM
             transaction_bd
         GROUP BY
             CUSTOMER_ID
        )
        SELECT 
            c.CLIENT_NAME,
            gt.SUM_TX_AMOUNT,
            c.END_DT
        FROM
            gt
        LEFT JOIN
            customer_bd c
                ON gt.CUSTOMER_ID = c.CLIENT_ID
        WHERE
                gt.SUM_TX_AMOUNT = (SELECT MIN(SUM_TX_AMOUNT) FROM gt)
            AND DATE(c.END_DT) >= DATE('2023-05-01')
        ;
    """, connection)
df

Unnamed: 0,CLIENT_NAME,SUM_TX_AMOUNT,END_DT
0,Kloe,30.48,2999-12-31


### C. Найти сумму транзакций относительно даты 2023-04-01 для клиентов, у которых id начинается с 111
Транзакции учитываются только после завершении дня.

In [85]:
df = pd.read_sql(
    f"""
        SELECT
            t.CUSTOMER_ID,
            c.CLIENT_NAME,
            SUM(t.TX_AMOUNT) AS SUM_TX_AMOUNT
        FROM
            transaction_bd t
        LEFT JOIN
            customer_bd c
                ON t.CUSTOMER_ID = c.CLIENT_ID
        WHERE
                DATE(t.TX_DATETIME) < DATE('2023-04-01')
            AND DATE(c.END_DT) >= DATE('2023-04-01')
            AND CAST(t.CUSTOMER_ID AS VARCHAR) LIKE '111%'
        GROUP BY
            t.CUSTOMER_ID
        ORDER BY
            c.CLIENT_ID
        ;
    """, connection)
df

Unnamed: 0,CUSTOMER_ID,CLIENT_NAME,SUM_TX_AMOUNT
0,1112,Tristan,35273.16
1,1113,Destiny,229558.85
2,1114,Daylani,108622.52
3,1115,Ishanvi,38690.76
4,1116,Romina,191203.59
5,1117,Alta,52751.58
6,1118,Gisselle,353003.39
7,1119,Maelani,221192.71


### D. Найти сумму транзакций относительно года рождения клиентов
Сортировать по убыванию года рождения.

In [86]:
df = pd.read_sql(
    f"""
        SELECT
            c.YEAR_BIRTH,
            SUM(t.TX_AMOUNT) AS SUM_TX_AMOUNT
        FROM
            transaction_bd t
        LEFT JOIN
            customer_bd c
                ON t.CUSTOMER_ID = c.CLIENT_ID
        WHERE
            DATE(c.END_DT) >= DATE('2023-05-01')
        GROUP BY
            c.YEAR_BIRTH
        ORDER BY
            c.YEAR_BIRTH DESC
        ;
    """, connection)
df

Unnamed: 0,YEAR_BIRTH,SUM_TX_AMOUNT
0,2000,86327196.3
1,1999,1041506.56
2,1998,601675.01
3,1997,8479942.93
4,1996,1483252.51
5,1995,6321199.64
6,1994,6244280.8
7,1993,5732058.13
8,1992,5251433.3
9,1991,7141073.57


### F. Количество транзакций для каждого клиента.
Сортировать по убыванию количества транзакций.

In [87]:
df = pd.read_sql(
    f"""
        SELECT
            t.CUSTOMER_ID,
            c.CLIENT_NAME,
            COUNT(t.TRANSACTION_ID) AS COUNT_TX_AMOUNT
        FROM
            transaction_bd t
        LEFT JOIN
            customer_bd c
                ON t.CUSTOMER_ID = c.CLIENT_ID
        WHERE
            DATE(c.END_DT) >= DATE('2023-05-01')
        GROUP BY
            t.CUSTOMER_ID
        ORDER BY
            COUNT_TX_AMOUNT DESC
        ;
    """, connection)
df

Unnamed: 0,CUSTOMER_ID,CLIENT_NAME,COUNT_TX_AMOUNT
0,2891,Kylee,477
1,2580,Cerenity,464
2,1953,Adara,455
3,2932,Michaela,454
4,3864,Claire,453
...,...,...,...
3731,1976,Becky,1
3732,1942,Perel,1
3733,1880,Roman,1
3734,1095,Marisa,1


## Шаг 3

In [88]:
df = pd.read_sql(
    f"""
        SELECT
            strftime('%m', TX_DATETIME) AS MONTH,
            CASE strftime('%m', TX_DATETIME)
                WHEN '01' THEN 'Январь'
                WHEN '02' THEN 'Февраль'
                WHEN '03' THEN 'Март'
                WHEN '04' THEN 'Апрель'
            END AS MONTH_NAME,
            SUM(t.TX_AMOUNT) AS SUM_TX_AMOUNT
        FROM
            transaction_bd t
        LEFT JOIN
            customer_bd c
                ON t.CUSTOMER_ID = c.CLIENT_ID
        WHERE
            DATE(c.END_DT) >= DATE('2023-05-01')
        GROUP BY
            strftime('%m', TX_DATETIME)
        ORDER BY
            strftime('%m', TX_DATETIME)
        ;
    """, connection)
df

Unnamed: 0,MONTH,MONTH_NAME,SUM_TX_AMOUNT
0,1,Январь,115919886.27
1,2,Февраль,106229454.86
2,3,Март,115053440.27
3,4,Апрель,72618124.49


### Закрываем соединение

In [89]:
connection.close()