In [1]:
#Este es el script en el que se maneja el envio de eventos kafka
#Se crea un producer de kafka y se obtienen datos para enviar

"""
FUNCIONAL
"""
# kafka/bin/zookeeper-server-start.sh kafka/config/zookeeper.properties
# kafka/bin/kafka-server-start.sh kafka/config/server.properties
# kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic WF

# kafka/bin/kafka-topics.sh --delete --zookeeper localhost:2181 --topic WF
 
# kafka/bin/kafka-topics.sh --list --zookeeper localhost:2181
# kafka/bin/kafka-console-consumer.sh --bootstrap-server 192.168.1.101:9092 --topic WF --from-beginning

# ./elasticsearch/bin/elasticsearch
# ./kibana/bin/kibana
# curl -XDELETE 'http://localhost:9200/wf*'

import findspark
findspark.init()

import time

from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

from kafka import KafkaProducer, TopicPartition
import uuid

In [2]:
#Se crea una sescion de spark
#Importante tras ejecucion hacer sc.stop()

sc = SparkContext('local')
spark = SparkSession(sc)

In [3]:
base_path='../../'

In [4]:
# Estrucutra del dataset 

schema = StructType([
    StructField('userid', StringType(), True),
    StructField('minact', IntegerType(), True),
    StructField('tseen', IntegerType(), True),
    StructField('tacum', IntegerType(), True),    
    StructField('visits', IntegerType(), True),
    StructField('act24h', IntegerType(), True),
    StructField('pwr', IntegerType(), True),
    StructField('footprint', StringType(), True),
    StructField('oui', StringType(), True),
    StructField('type', StringType(), True),
    StructField('tx_packets', IntegerType(), True),
    StructField('tx_bytes', IntegerType(), True),
    StructField('rx_packets', IntegerType(), True),
    StructField('rx_bytes', IntegerType(), True),
    StructField('ap', StringType(), True),
    StructField('essid', StringType(), True),
    StructField('apwr', IntegerType(), True),
    StructField('timestamp', DoubleType(), True)
])

In [5]:
df = spark.read.schema(schema).option("mode", "DROPMALFORMED").load(f'{base_path}datasets/dataset_wifi_v2.csv',\
                     format="csv", sep=",", inferSchema="False", header="true")

In [6]:
# AÃ±adimos metadatos para simular eventos del sensor

df = df.withColumn('data', struct(col('*')))
df = df.withColumn('version', lit('1.0'))
df = df.withColumn('id' , lit('f0c48ba4-387d-11ea-a137-2e728ce88126'))
df = df.withColumn('type', lit('WF'))
df = df.withColumn('event', lit('DATA'))

In [7]:
# Creamos productor de eventos Kafka

ip_server = '192.168.1.101:9092'
kafka_topic = 'WF'
c = 0
sleep = 20
seed = 1

while True:
    df_sample = df.sample(fraction = 0.01, seed = seed)
    df_sample = df_sample.withColumn('time', unix_timestamp().cast(StringType()))
    
    prediction_features = df_sample.select('version','time','id','type','event','data')

    if type(kafka_topic) == bytes:
        kafka_topic = kafka_topic.decode('utf-8')
    producer = KafkaProducer(bootstrap_servers=[ip_server],api_version=(0,10))
    PREDICTION_TOPIC = kafka_topic

    #FUNCIONAL
    for row in prediction_features.toJSON().collect():
        print((row))
        producer.send(PREDICTION_TOPIC , row.encode())
        producer.flush()
    
    time.sleep(sleep)
    c = c + 1
    seed = seed + 1

{"version":"1.0","time":"1615368460","id":"f0c48ba4-387d-11ea-a137-2e728ce88126","type":"WF","event":"DATA","data":{"userid":"DA:A1:19:0C:9E:51","minact":1,"tseen":300,"tacum":300,"visits":1,"act24h":1,"pwr":-1,"footprint":"05:4C:AE:1D:AA:32","oui":"google","type":"CID","tx_packets":1,"tx_bytes":83,"rx_packets":0,"rx_bytes":0,"ap":"unknown","essid":"unknown","apwr":-1,"timestamp":1.61253E9}}
{"version":"1.0","time":"1615368460","id":"f0c48ba4-387d-11ea-a137-2e728ce88126","type":"WF","event":"DATA","data":{"userid":"04:1E:64:EF:56:D2","minact":1,"tseen":240,"tacum":3220,"visits":11,"act24h":1,"pwr":-1,"footprint":"01:08:81:32:04:44","oui":"apple","type":"MAL","tx_packets":1,"tx_bytes":55,"rx_packets":0,"rx_bytes":0,"ap":"unknown","essid":"unknown","apwr":-1,"timestamp":1.6124814E9}}
{"version":"1.0","time":"1615368460","id":"f0c48ba4-387d-11ea-a137-2e728ce88126","type":"WF","event":"DATA","data":{"userid":"F6:37:D5:26:A5:7F","minact":1,"tseen":100,"tacum":100,"visits":1,"act24h":1,"pwr"

KeyboardInterrupt: 

In [11]:
# Creamos productor de eventos Kafka
import json
ip_server = '192.168.1.101:9092'
kafka_topic = 'WF'
c = 0
sleep = 20
seed = 1
prediction_features = {"version": "2.0", "id": "f0c48ba4-387d-11ea-a137-2e728ce88125", "type": "WF", "event": "DATA", "time": 1615204920, "data": {"userid": "16:EA:2C:AB:48:0F", "minact": 1, "tseen": 30, "tacum": 4040, "visits": 3, "act24h": 1, "pwr": -70, "footprint": "24:4D:63:B0:68:28", "oui": "unknown", "type": "LMA", "tx_packets": 3, "tx_bytes": 435, "rx_packets": 0, "rx_bytes": 0, "ap": "unknown", "essid": "unknown", "apwr": -1, "timestamp": 1615204860.0}}
#FUNCIONAL
json_object = json.dumps(prediction_features)   
row = json_object 
print((row))
producer.send(PREDICTION_TOPIC , row.encode())
producer.flush()

{"version": "2.0", "id": "f0c48ba4-387d-11ea-a137-2e728ce88125", "type": "WF", "event": "DATA", "time": 1615204920, "data": {"userid": "16:EA:2C:AB:48:0F", "minact": 1, "tseen": 30, "tacum": 4040, "visits": 3, "act24h": 1, "pwr": -70, "footprint": "24:4D:63:B0:68:28", "oui": "unknown", "type": "LMA", "tx_packets": 3, "tx_bytes": 435, "rx_packets": 0, "rx_bytes": 0, "ap": "unknown", "essid": "unknown", "apwr": -1, "timestamp": 1615204860.0}}


In [None]:
sc.stop()