# Подключаемся к БД

In [None]:
import psycopg2 as ps
import pandas as pd
import os

schema = 'shustikov' # В расках схемы задайте свою фамилию

conn = ps.connect(host="postgres_source", 
                  port = 5432, 
                database="dev", 
                user=os.getenv("POSTGRES_USER"), 
                password=os.getenv("POSTGRES_PASSWORD"))

cursor = conn.cursor()

# Создаём тестовое окружение

In [None]:
# создаем схему
cursor.execute(f'''
    CREATE SCHEMA IF NOT EXISTS {schema};
    ''')

cursor.execute(f'''
    DROP TABLE IF EXISTS {schema}.employees;
    DROP TABLE IF EXISTS {schema}.departments CASCADE;
''')

cursor.execute(f'''
    CREATE TABLE {schema}.departments (
        dept_id SERIAL PRIMARY KEY,
        dept_name VARCHAR(50),
        location VARCHAR(50)
    )
''')

cursor.execute(f'''
    CREATE TABLE {schema}.employees (
        id SERIAL PRIMARY KEY,
        name VARCHAR(100),
        department VARCHAR(50),
        salary NUMERIC(10,2),
        hire_date DATE,
        email VARCHAR(100)
    )
''')

# Вставляем тестовые данные
cursor.execute(f'''
    INSERT INTO {schema}.departments (dept_name, location) VALUES
    ('HR', 'Москва'),
    ('IT', 'Санкт-Петербург'),
    ('Finance', 'Москва'),
    ('DE', 'Краснодар')
''')

cursor.execute(f'''
    INSERT INTO {schema}.employees (name, department, salary, hire_date, email) VALUES
    ('Иван Иванов', 'HR', 50000, '2020-01-15', 'ivanov@example.com'),
    ('Петр Петров', 'IT', NULL, '2019-03-10', 'petrov@example.com'),
    ('Светлана Смирнова', 'Finance', 60000, '2021-07-22', 'smit@example.com'),
    ('Алексей Кузнецов', 'IT', 80000, '2018-11-05', 'kuznecov@example.com'),
    ('Мария Иванова', 'HR', 52000, '2022-02-17', 'maria@example.com')
''')
conn.commit()


In [None]:
def query_sql(query, is_insert=False):
    conn = ps.connect(host="postgres_source", 
                      port = 5432, 
                      database="dev", 
                      user=os.getenv("POSTGRES_USER"), 
                      password=os.getenv("POSTGRES_PASSWORD"))
    cursor = conn.cursor()
    cursor.execute(query)
    if is_insert == False:
        df = cursor.fetchall()
        field_names = [i[0].lower() for i in cursor.description]
        df = pd.DataFrame(df, columns=field_names)
        df.columns = field_names
    else:
        df = pd.DataFrame()
    cursor.execute("DEALLOCATE ALL")
    cursor.close()
    return df

# 1. Извлечение информации при помощи SQL-запросов

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees

''')

# 2. Фильтрация строк **WHERE**, **LIKE**, **ILIKE**, **AND**, **OR**

**WHERE**

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees 
WHERE department = 'IT'

''')

**LIKE/ILIKE**

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees 
WHERE email LIKE '%@example_com'

''')

**AND**

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees 
WHERE department = 'HR' 
    AND salary > 51000
    
''')

**OR**

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees 
WHERE department = 'HR' 
    OR salary > 75000

''')

# 3. Сортировка строк: **ORDER BY**, **LIMIT**, **OFFSET**

**ORDER BY**

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees 
ORDER BY salary DESC

''')

**ORDER BY**

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees 
ORDER BY hire_date 
LIMIT 3

''')

**ORDER BY ... OFFSET ...**

In [None]:
query_sql(f'''

SELECT * 
FROM {schema}.employees 
ORDER BY salary 
LIMIT 2 OFFSET 2

''')

# 4. Условные выражения: **CASE**, **COALESCE**, **NULLIF**, **GREATEST**, **LEAST**

**CASE**

In [None]:
query_sql(f'''

    SELECT name,
           salary,
           CASE WHEN salary > 60000 THEN 'Высокая'
                WHEN salary BETWEEN 50000 AND 60000 THEN 'Средняя'
                ELSE 'Низкая' 
            END AS salary_level
    FROM {schema}.employees

''')

**COALESCE**

In [None]:
query_sql(f'''

    SELECT 
        name, 
        COALESCE(email, 'Нет email') AS email_info 
    FROM {schema}.employees

''')

**NULLIF**

In [None]:
query_sql(f'''

    SELECT 
        name, 
        NULLIF(department, 'HR') AS dept_without_hr 
    FROM {schema}.employees

''')

**GREATEST / LEAST**

In [None]:
query_sql(f'''

    SELECT 
        name, 
        salary,
        GREATEST(salary, 60000) AS max_salary, 
        LEAST(salary, 60000) AS min_salary 
    FROM {schema}.employees

''')

# 5. Работа со строками: **SUBSTRING**, **LOWER**, **UPPER**, **TRIM**, **LENGTH**

In [None]:
query_sql(f'''

    SELECT name, 
           SUBSTRING(name, 1, 5) AS name_substring,
           LOWER(email) AS email_lower,
           UPPER(name) AS name_upper,
           LENGTH(name) AS name_length,
           TRIM('   текст с пробелами   ') AS trimmed_text
    FROM {schema}.employees
    
''')

# 6. Работа с датами

In [None]:
query_sql(f'''

    SELECT name, 
           hire_date,
           DATE(hire_date) AS hire_date_only,
           TO_CHAR(hire_date, 'yyyy-mm'),
           DATE_TRUNC('month', hire_date),
           hire_date + 1,
           hire_date + interval '1 hour',
           DATE_PART('DOW', hire_date)
    FROM {schema}.employees

''')

# 7. Агрегатные функции: **COUNT**, **SUM**, **MIN**, **MAX**, **AVG**, **GROUP BY**, **HAVING**

In [None]:
query_sql(f'''

    SELECT 
        department, 
        COUNT(*) AS count_employees, 
        COUNT(salary) as count_salary,
        AVG(salary) AS avg_salary_1,
        SUM(salary) / COUNT(*) AS avg_salary_2,
        SUM(salary) / COUNT(salary) AS avg_salary_3,
        MIN(salary) as min_salary,
        MAX(salary) as max_salary
    FROM {schema}.employees
    GROUP BY department
    
''')

In [None]:
query_sql(f'''

    SELECT 
        department, 
        COUNT(*) AS count_employees
    FROM {schema}.employees
    GROUP BY department
    HAVING COUNT(*) > 1

''')

# 8. Оконные функции: **row_number**, **rank**, **dense_rank**, **lag**, **lead**

In [None]:
query_sql(f'''

    SELECT 
        name, 
        salary,
        department,
        ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
        RANK() OVER (ORDER BY department DESC) AS rank,
        DENSE_RANK() OVER (ORDER BY department DESC) AS dense_rank,
        LAG(salary, 1) OVER (ORDER BY salary DESC) AS prev_salary,
        LEAD(salary, 1) OVER (ORDER BY salary DESC) AS next_salary,
        SUM(salary) OVER() as sal_all,
        SUM(salary) OVER(order by salary DESC) as sal_all
    FROM {schema}.employees
    order by salary DESC

''')

# 9. Типы связей в БД и оператор **JOIN**

In [None]:

query_sql(f'''

    SELECT e.name, d.dept_name, d.location
    FROM {schema}.employees e
        JOIN {schema}.departments d 
            ON e.department = d.dept_name

''')

In [None]:

query_sql(f'''

    SELECT e.name, d.dept_name, d.location
    FROM {schema}.employees e
        RIGHT JOIN {schema}.departments d 
            ON e.department = d.dept_name

''')

In [None]:
query_sql(f'''

    SELECT e.name, d.dept_name, d.location
    FROM {schema}.employees e
        CROSS JOIN {schema}.departments d 

''')

# 11. Подзапросы

In [None]:
query_sql(f'''

    SELECT name, salary
    FROM {schema}.employees
    WHERE salary > (SELECT AVG(salary) 
                    FROM {schema}.employees)

''')

In [None]:
query_sql(f'''
    WITH t1 AS (
        SELECT AVG(salary) as avg_salary FROM {schema}.employees
    )
    SELECT name, salary
    FROM {schema}.employees
    WHERE salary > (SELECT avg_salary FROM t1)

''')