### bq 데이터를 kafka에 저장

In [49]:
PROJECT = "emart-datafabric"
DATASET = "common_dev"
TABLE = "dfm_sample_eapp_data"
LIMIT = 10

In [50]:
BOOTSTRAP_SERVERS = "datafabric-kafka-kafka-bootstrap.kafka-farm.svc.cluster.local:9092"
TOPICS = 'test'
CONSUMER_GROUP = 'test-datafabric'

In [51]:
import traceback
from pydatafabric.gcp import bq_to_pandas

df = bq_to_pandas(f"""
    select review_id as key, comments as value
    from  `{PROJECT}.{DATASET}.{TABLE}`
    where comments != ''
    limit {LIMIT}
""")

unsupported operand type(s) for /: 'NoneType' and 'int'


Downloading: 100%|██████████| 10/10 [00:00<00:00, 12.45rows/s]


In [52]:
df.head(10)

Unnamed: 0,key,value
0,2208281244313807,샤인머스캣 너무 비싸요
1,2208280027362716,요즘 계속 사게 되네요
2,2208281618561112,주말 특가로 저령하게 구입했는데 당도도 아주 좋아요
3,2208280253124586,당도도 높고 너무 맛있게먹었어요
4,2208281913283031,.껍질이 두꺼워 실망했어요 먹을부분은 작아지니까~ 맛은 달콤하니 좋아여. 여름 끝...
5,2208280102419526,가격도싸고달고맛있어요.
6,2208281643108429,포도하면 송산포도가 젤이예여
7,2208280202528223,좋아해서 구매했는데.맛있습니다.
8,2208280032177490,올해 먹은 포도 중에 최고
9,2208280956359149,조아요 조아요 조아요 조아요 조아요 조아요


In [54]:
print(df.dtypes)

key      object
value    object
dtype: object


### Kafka Prodcuer, Consumer, Create Topic Example

In [6]:
!python -m pip install kafka-python

Defaulting to user installation because normal site-packages is not writeable


In [62]:
import threading, time
from kafka import KafkaProducer, KafkaConsumer, KafkaAdminClient
from kafka.admin import NewTopic
from json import loads, dumps
    
    
class Producer(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.stop_event = threading.Event()

    def stop(self):
        self.stop_event.set()

    def run(self):
        producer = KafkaProducer(acks="all",
                                 compression_type='gzip',
                                 bootstrap_servers=BOOTSTRAP_SERVERS,
                                 value_serializer=lambda x: dumps(x).encode('utf-8'))

        while not self.stop_event.is_set():
            producer.send(TOPICS, value=df.to_json(force_ascii=False))
            time.sleep(1)

        producer.close()
        

class Consumer(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.stop_event = threading.Event()

    def stop(self):
        self.stop_event.set()

    def run(self):
        consumer = KafkaConsumer(bootstrap_servers=BOOTSTRAP_SERVERS,
                                 auto_offset_reset='earliest',
                                 group_id=CONSUMER_GROUP,
                                 value_deserializer=lambda x: loads(x.decode('utf-8')),
                                 max_poll_records = 2,
                                 consumer_timeout_ms=1000)
        
        consumer.subscribe([TOPICS])

        while not self.stop_event.is_set():
            for message in consumer:
                print(message)
                if self.stop_event.is_set():
                    break

        consumer.close()

                                 
def main():
    # try:
    #     admin = KafkaAdminClient(bootstrap_servers=BOOTSTRAP_SERVERS)
    #     topic = NewTopic(name=TOPICS,
    #                      num_partitions=1,
    #                      replication_factor=1)
    #     admin.create_topics([topic])
    # except Exception as e:
    #     print(e)

    tasks = [Producer(), Consumer()]

    for t in tasks:
        t.start()

    time.sleep(10)

    for task in tasks:
        task.stop()

    for task in tasks:
        task.join()

In [63]:
main()

ConsumerRecord(topic='test', partition=3, offset=188, timestamp=1665729832139, timestamp_type=0, key=None, value='{"key":{"0":"2208281244313807","1":"2208280027362716","2":"2208281618561112","3":"2208280253124586","4":"2208281913283031","5":"2208280102419526","6":"2208281643108429","7":"2208280202528223","8":"2208280032177490","9":"2208280956359149"},"value":{"0":"샤인머스캣 너무 비싸요","1":"요즘 계속 사게 되네요","2":"주말 특가로 저령하게 구입했는데 당도도 아주 좋아요","3":"당도도 높고 너무 맛있게먹었어요","4":".껍질이 두꺼워 실망했어요  먹을부분은 작아지니까~ 맛은 달콤하니 좋아여. 여름 끝물인 듯~","5":"가격도싸고달고맛있어요.","6":"포도하면 송산포도가 젤이예여","7":"좋아해서 구매했는데.맛있습니다.","8":"올해 먹은 포도 중에 최고","9":"조아요 조아요 조아요 조아요 조아요 조아요"}}', headers=[], checksum=None, serialized_key_size=-1, serialized_value_size=1391, serialized_header_size=-1)
ConsumerRecord(topic='test', partition=0, offset=11161, timestamp=1665729833140, timestamp_type=0, key=None, value='{"key":{"0":"2208281244313807","1":"2208280027362716","2":"2208281618561112","3":"2208280253124586","4":"2208281913283031","5":"2208280102419