In [1]:
#Este es el script en el que se maneja el envio de eventos kafka
#Se crea un producer de kafka y se obtienen datos para enviar

"""
FUNCIONAL|
"""
# kafka/bin/zookeeper-server-start.sh kafka/config/zookeeper.properties
# kafka/bin/kafka-server-start.sh kafka/config/server.properties
# kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic BT

# kafka/bin/kafka-topics.sh --delete --zookeeper localhost:2181 --topic BT
 
# kafka/bin/kafka-topics.sh --list --zookeeper localhost:2181
# kafka/bin/kafka-console-consumer.sh --bootstrap-server 192.168.1.101:9092 --topic BT --from-beginning

# ./elasticsearch/bin/elasticsearch
# ./kibana/bin/kibana
# curl -XDELETE 'http://localhost:9200/bt*'

import findspark
findspark.init()

import time

from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

from kafka import KafkaProducer, TopicPartition
import uuid

In [2]:
#Se crea una sescion de spark
#Importante tras ejecucion hacer sc.stop()

sc = SparkContext('local')
spark = SparkSession(sc)

In [3]:
base_path='../../'

In [4]:
#Se define el esquema que va a tener el data frame

schema = StructType([
    StructField('status', StringType(), True),
    StructField('classic_mode', StringType(), True),
    StructField('uuid', StringType(), True),
    StructField('company', StringType(), True),
    StructField('updated_at', StringType(), True),
    StructField('last_seen', StringType(), True),
    StructField('uap_lap', StringType(), True),
    StructField('address', StringType(), True),
    StructField('lmp_version', StringType(), True),
    StructField('le_mode', StringType(), True),
    StructField('manufacturer', StringType(), True),
    StructField('created_at', StringType(), True),
    StructField('name', StringType(), True),
])

In [5]:
df = spark.read.schema(schema).option("multiLine","true")\
    .option("mode", "DROPMALFORMED").json(f'{base_path}datasets/dataset_bluetooth.json')

In [6]:
# Añadimos metadatos para simular eventos del sensor

df = df.withColumn('data', struct(col('*')))
df = df.withColumn('version', lit('1.0'))
df = df.withColumn('id' , lit('f0c48ba4-387d-11ea-a137-2e728ce88126'))
df = df.withColumn('type', lit('BT'))
df = df.withColumn('event', lit('DATA'))

In [7]:
# Creamos productor de eventos Kafka

ip_server = '138.4.7.158:9092'
kafka_topic = 'BT-DATA'
c = 0
sleep = 20
seed = 1

while True:
    df_sample = df.sample(fraction = 0.15, seed = seed)
    df_sample = df_sample.withColumn('time', unix_timestamp().cast(StringType()))
    
    prediction_features = df_sample.select('version','time','id','type','event','data')

    if type(kafka_topic) == bytes:
        kafka_topic = kafka_topic.decode('utf-8')
    producer = KafkaProducer(bootstrap_servers=[ip_server],api_version=(0,10))
    PREDICTION_TOPIC = kafka_topic

    #FUNCIONAL
    for row in prediction_features.toJSON().collect():
        print((row))
        producer.send(PREDICTION_TOPIC , row.encode())
        producer.flush()
    
    time.sleep(sleep)
    c = c + 1
    seed = seed + 1

{"version":"1.0","time":"1616631831","id":"f0c48ba4-387d-11ea-a137-2e728ce88126","type":"BT","event":"DATA","data":{"status":"online","classic_mode":"t","uuid":"d03256ae-cac4-41ee-9071-d74d4d1c57a7","company":"unknow","updated_at":"2021-02-05T12:19:58+01:00","last_seen":"1612523992","uap_lap":"C3:42:B3:5C","address":"B0:B5:C3:42:B3:5C","lmp_version":"Bluetooth 5.0 (0x09) - Subversion 702 (0x02be)","le_mode":"f","manufacturer":"Qualcomm (29)","created_at":"2021-02-01T11:23:12+01:00","name":"OPPO A9 2020"}}
{"version":"1.0","time":"1616631831","id":"f0c48ba4-387d-11ea-a137-2e728ce88126","type":"BT","event":"DATA","data":{"status":"offline","classic_mode":"f","uuid":"1a76c2fa-c558-415f-a193-3c979a4f9056","company":"Anhui Huami Information Technology Co., Ltd. (343","updated_at":"2021-02-03T18:36:42+01:00","last_seen":"1612373621","uap_lap":"68:47:7E:48","address":"E2:72:68:47:7E:48","lmp_version":"unknow","le_mode":"t","manufacturer":"unknow","created_at":"2021-02-01T11:56:22+01:00","name

KeyboardInterrupt: 

In [None]:
# Creamos productor de eventos Kafka
import json
ip_server = '192.168.1.101:9092'
kafka_topic = 'BT'
c = 0
sleep = 20
seed = 1
prediction_features = {"version": "2.0", "id": "f0c48ba4-387d-11ea-a137-2e728ce88125", "type": "BT", "event": "DATA", "time": 1615210092, "data": {"uuid": "177fd494-501c-44e8-a62c-9713d4aff6e5", "name": "AB Shutter3", "status": "online", "address": "FF:FF:B9:06:C9:91", "uap_lap": "B9:06:C9:91", "company": "unknow", "lmp_version": "unknow", "manufacturer": "unknow", "classic_mode": "f", "le_mode": "t", "created_at": "2021-03-03T15:46:20+01:00", "updated_at": "2021-03-08T14:27:30+01:00", "last_seen": 1615210049}}

#FUNCIONAL
json_object = json.dumps(prediction_features)   
row = json_object 
print((row))
producer.send(PREDICTION_TOPIC , row.encode())
producer.flush()

In [None]:
sc.stop()