### KAFKA CONNECT

In [4]:
import requests
import json

response = requests.get("http://localhost:8083/connectors")
response.json()

['file-source-connector']

In [2]:
response = requests.get("http://localhost:8083/connectors/file-source-connector")
response.json()

{'error_code': 404, 'message': 'Connector file-source-connector not found'}

Создаем коннектор для чтения из файла

In [3]:
import requests
import json

url = 'http://localhost:8083/connectors'

headers = {
    'Content-Type': 'application/json'
}

data = {
    "name": "file-source-connector",  # Имя коннектора
    "config": {
        "connector.class": "FileStreamSource",  # Класс коннектора
        "tasks.max": "1",  # Количество задач
        "file": "/tmp/tmp/test-file.txt",  # Путь к файлу, который нужно читать
        "topic": "file-topic",  # Топик, куда отправлять данные
        "poll.interval.ms": "1000"  # Интервал опроса файла (в миллисекундах)
    }
}

response = requests.post(url, headers=headers, data=json.dumps(data))

if response.status_code == 201:
    print('Файловый коннектор успешно создан')
else:
    print('Ошибка при создании файлового коннектора:', response.status_code, response.text)


Файловый коннектор успешно создан


In [5]:
## STATUS
## GET /connectors/{connector_name}/status
response = requests.get("http://localhost:8083/connectors/file-source-connector/status")
response.json()

{'name': 'file-source-connector',
 'connector': {'state': 'RUNNING', 'worker_id': '172.18.0.6:8083'},
 'tasks': [{'id': 0, 'state': 'RUNNING', 'worker_id': '172.18.0.6:8083'}],
 'type': 'source'}

## DEBEZIUM CDC

1. Создадим табличку my_table
2. Создадим коннектор к ней

In [6]:
from sqlalchemy import create_engine
import pandas as pd
import os
import time

pwd = "secret"
uid = "debezium"
server = "localhost"
db = "mydb"
port = "5432"
#
engine = create_engine(f'postgresql://{uid}:{pwd}@{server}:{port}/{db}')

In [7]:
df = pd.read_sql('select * from public.my_table', engine)
df

Unnamed: 0,id,name
0,1,John Doe
1,2,Alice
2,3,Bob
3,4,John Doe
4,5,Alice
5,6,Bob
6,7,asd
7,8,Alice
8,9,Bob
9,10,asd----


In [91]:
# Создайте DataFrame с данными для вставки
data = {'name': ['aaaaaaaaaaaaaaa', 'ssssssssss', 'ddddddddddddd']}
df = pd.DataFrame(data)

# Вставка данных в my_table
df.to_sql('my_table', engine, if_exists='append', index=False)

3

In [8]:
response = requests.get("http://localhost:8083/")
response.json()

{'version': '2.6.1',
 'commit': '6b2021cd52659cef',
 'kafka_cluster_id': 'MkU3OEVBNTcwNTJENDM2Qk'}

In [9]:
# TOPIC создастся автоматически!
# source.public.my_table

postgres_connector = {
    "name": "my_table-connector",
    "config": {
        "connector.class": "io.debezium.connector.postgresql.PostgresConnector",
        "database.hostname": "postgr",  
        "database.port": "5432",
        "database.user": "debezium",      
        "database.password": "secret", 
        "database.dbname": "mydb",  
        "plugin.name": "pgoutput",
        "database.server.name": "source",
        "key.converter.schemas.enable": "false",
        "value.converter.schemas.enable": "false",
        "transforms": "unwrap",
        "transforms.unwrap.type": "io.debezium.transforms.ExtractNewRecordState",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "key.converter": "org.apache.kafka.connect.json.JsonConverter",
        "table.include.list": "public.my_table", 
        "slot.name": "dbz_sales_transaction_slot"
    }
}


In [10]:
response = requests.get('http://localhost:8083/connectors/')
response.json()

['file-source-connector']

In [11]:
url = 'http://localhost:8083/connectors/'


# Отправляем данные как JSON
response = requests.post(url, json = postgres_connector)

# Проверяем статус код ответа
if response.status_code == 200:
    print("Данные успешно отправлены.")
    print("Ответ сервера:", response.json())  # Если сервер возвращает JSON-ответ
else:
    print("Ошибка при отправке данных:", response.status_code, response.text)

Ошибка при отправке данных: 201 {"name":"my_table-connector","config":{"connector.class":"io.debezium.connector.postgresql.PostgresConnector","database.hostname":"postgr","database.port":"5432","database.user":"debezium","database.password":"secret","database.dbname":"mydb","plugin.name":"pgoutput","database.server.name":"source","key.converter.schemas.enable":"false","value.converter.schemas.enable":"false","transforms":"unwrap","transforms.unwrap.type":"io.debezium.transforms.ExtractNewRecordState","value.converter":"org.apache.kafka.connect.json.JsonConverter","key.converter":"org.apache.kafka.connect.json.JsonConverter","table.include.list":"public.my_table","slot.name":"dbz_sales_transaction_slot","name":"my_table-connector"},"tasks":[],"type":"source"}


In [12]:
response = requests.get('http://localhost:8083/connectors/my_table-connector')
response.json()

{'name': 'my_table-connector',
 'config': {'connector.class': 'io.debezium.connector.postgresql.PostgresConnector',
  'database.user': 'debezium',
  'database.dbname': 'mydb',
  'slot.name': 'dbz_sales_transaction_slot',
  'transforms': 'unwrap',
  'database.server.name': 'source',
  'database.port': '5432',
  'plugin.name': 'pgoutput',
  'key.converter.schemas.enable': 'false',
  'database.hostname': 'postgr',
  'database.password': 'secret',
  'value.converter.schemas.enable': 'false',
  'name': 'my_table-connector',
  'transforms.unwrap.type': 'io.debezium.transforms.ExtractNewRecordState',
  'value.converter': 'org.apache.kafka.connect.json.JsonConverter',
  'table.include.list': 'public.my_table',
  'key.converter': 'org.apache.kafka.connect.json.JsonConverter'},
 'tasks': [{'connector': 'my_table-connector', 'task': 0}],
 'type': 'source'}

In [13]:
response = requests.get('http://localhost:8083/connectors/my_table-connector/status')
response.json()

{'name': 'my_table-connector',
 'connector': {'state': 'RUNNING', 'worker_id': '172.18.0.6:8083'},
 'tasks': [{'id': 0, 'state': 'RUNNING', 'worker_id': '172.18.0.6:8083'}],
 'type': 'source'}

#### Ограничиваем столбцы через TRANSFORM

In [16]:
postgres_connector_my_table2 = {
    "name": "my_connector22",
    "config": {
        "connector.class": "io.debezium.connector.postgresql.PostgresConnector",
        "database.hostname": "postgr",  
        "database.port": "5432",
        "database.user": "debezium",      
        "database.password": "secret", 
        "database.dbname": "mydb",  
        "plugin.name": "pgoutput",
        "database.server.name": "source",
        "key.converter.schemas.enable": "false",
        "value.converter.schemas.enable": "false",
        "transforms": "unwrap",
        "transforms.unwrap.type": "io.debezium.transforms.ExtractNewRecordState",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "key.converter": "org.apache.kafka.connect.json.JsonConverter",
        "table.include.list": "public.my_table2",  # Укажите таблицы
        "slot.name": "dbz_sales_transaction_slot1",
        "transforms.unwrap.add.fields": "id,name2"  # Добавьте необходимые колонки
    }
}


In [17]:
url = 'http://localhost:8083/connectors/'

# Отправляем данные как JSON
response = requests.post(url, json = postgres_connector_my_table2)

# Проверяем статус код ответа
if response.status_code == 200:
    print("Данные успешно отправлены.")
    print("Ответ сервера:", response.json())  # Если сервер возвращает JSON-ответ
else:
    print("Ошибка при отправке данных:", response.status_code, response.text)

Ошибка при отправке данных: 201 {"name":"my_connector22","config":{"connector.class":"io.debezium.connector.postgresql.PostgresConnector","database.hostname":"postgr","database.port":"5432","database.user":"debezium","database.password":"secret","database.dbname":"mydb","plugin.name":"pgoutput","database.server.name":"source","key.converter.schemas.enable":"false","value.converter.schemas.enable":"false","transforms":"unwrap","transforms.unwrap.type":"io.debezium.transforms.ExtractNewRecordState","value.converter":"org.apache.kafka.connect.json.JsonConverter","key.converter":"org.apache.kafka.connect.json.JsonConverter","table.include.list":"public.my_table2","slot.name":"dbz_sales_transaction_slot1","transforms.unwrap.add.fields":"id,name2","name":"my_connector22"},"tasks":[],"type":"source"}


In [18]:
response = requests.get('http://localhost:8083/connectors/my_connector22/status')
response.json()

{'name': 'my_connector22',
 'connector': {'state': 'RUNNING', 'worker_id': '172.18.0.6:8083'},
 'tasks': [{'id': 0,
   'state': 'FAILED',
   'worker_id': '172.18.0.6:8083',
   'trace': 'org.apache.kafka.connect.errors.ConnectException: Tolerance exceeded in error handler\n\tat org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:206)\n\tat org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execute(RetryWithToleranceOperator.java:132)\n\tat org.apache.kafka.connect.runtime.TransformationChain.apply(TransformationChain.java:50)\n\tat org.apache.kafka.connect.runtime.WorkerSourceTask.sendRecords(WorkerSourceTask.java:339)\n\tat org.apache.kafka.connect.runtime.WorkerSourceTask.execute(WorkerSourceTask.java:264)\n\tat org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:185)\n\tat org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:235)\n\tat java.base/java.util.concurrent.Executors$Ru

In [6]:
from confluent_kafka.admin import AdminClient   

# Определите адреса брокеров Kafka
bootstrap_servers = ['localhost:9092']

# Настройка Kafka
config = {
    'bootstrap.servers': 'localhost:9092'  # Адрес вашего Kafka-брокера
}
# Создайте экземпляр KafkaAdminClient
admin_client = AdminClient(config)

# Запрос метаданных для получения списка топиков
metadata = admin_client.list_topics(timeout=10)

# Получение списка топиков
topics = metadata.topics

# Вывод списка топиков
for topic in topics:
    print(topic)

order_app-order_counts-changelog
order_app-__assignor-__leader
_confluent-ksql-default__command_topic
connect_status
_confluent-ksql-default_query_CTAS_USER_ORDERS_9-Aggregate-Aggregate-Materialize-changelog
connect_offsets
_schemas
orders_faust
USER_ORDERS
connect_configs
_confluent-ksql-default_query_CTAS_USER_ORDERS_9-Aggregate-GroupBy-repartition
__transaction_state
orders
__consumer_offsets


In [7]:
import json
topicName = 'source.public.my_table'
# Initialize consumer variable
consumer = KafkaConsumer (topicName , auto_offset_reset='earliest', 
                          bootstrap_servers = bootstrap_servers, group_id='sales-transactions')

# Read and print message from consumer
for msg in consumer:
    print(json.loads(msg.value))

NameError: name 'KafkaConsumer' is not defined

In [77]:
# Удаление
#response = requests.delete('http://localhost:8083/connectors/my_table-connector')
#response.json()