In [1]:
#Este es el script en el que se maneja el envio de eventos kafka
#Se crea un producer de kafka y se obtienen datos para enviar

"""
FUNCIONAL
"""
# kafka/bin/zookeeper-server-start.sh kafka/config/zookeeper.properties
# kafka/bin/kafka-server-start.sh kafka/config/server.properties
# kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic FW
# kafka/bin/kafka-topics.sh --create --zookeeper 138.4.7.158:2181 --replication-factor 1 --partitions 1 --topic FW

# kafka/bin/kafka-topics.sh --delete --zookeeper localhost:2181 --topic FW
 
# kafka/bin/kafka-topics.sh --list --zookeeper localhost:2181
# kafka/bin/kafka-topics.sh --list --zookeeper 138.4.7.158:2181
# kafka/bin/kafka-console-consumer.sh --bootstrap-server 192.168.1.101:9092 --topic FW --from-beginning

# ./elasticsearch/bin/elasticsearch
# ./kibana/bin/kibana
# curl -XDELETE 'http://localhost:9200/fw*'

import findspark
findspark.init()

import time

from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

from kafka import KafkaProducer, TopicPartition
import uuid

In [2]:
#Se crea una sescion de spark
#Importante tras ejecucion hacer sc.stop()

sc = SparkContext('local')
spark = SparkSession(sc)

In [3]:
base_path='../../'

In [4]:
#Se define el esquema que va a tener el data frame

schema = StructType([
    StructField('Time', StringType(), True),
    StructField('Blade', StringType(), True),
    StructField('Action', StringType(), True),
    StructField('Type', StringType(), True),
    StructField('Interface', StringType(), True),
    StructField('Origin', StringType(), True),
    StructField('Source', StringType(), True),
    StructField('Source User Name', StringType(), True),
    StructField('Destination', StringType(), True),
    StructField('Service', StringType(), True),
    StructField('Access Rule Number', StringType(), True),
    StructField('Access Rule Name', StringType(), True),
    StructField('Policy Name', StringType(), True),
    StructField('Description', StringType(), True),
    StructField('Id', StringType(), True),
    StructField('Marker', StringType(), True),
    StructField('Log Server Origin', StringType(), True),
    StructField('Interface Direction', StringType(), True),
    StructField('Interface Name', StringType(), True),
    StructField('Connection Direction', StringType(), True),
    StructField('Id Generated By Indexer', StringType(), True),
    StructField('First', StringType(), True),
    StructField('Sequencenum', StringType(), True),
    StructField('Source Zone', StringType(), True),
    StructField('Destination Zone', StringType(), True),
    StructField('Service ID', StringType(), True),
    StructField('Source Port', StringType(), True),
    StructField('Destination Port', StringType(), True),
    StructField('IP Protocol', StringType(), True),
    StructField('Xlate (NAT) Source IP', StringType(), True),
    StructField('Xlate (NAT) Source Port', StringType(), True),
    StructField('Xlate (NAT) Destination Port', StringType(), True),
    StructField('NAT Rule Number', StringType(), True),
    StructField('NAT Additional Rule Number', StringType(), True),
    StructField('Source Machine Name', StringType(), True),
    StructField('Hll Key', StringType(), True),
    StructField('Context Num', StringType(), True),
    StructField('Policy Management', StringType(), True),
    StructField('Db Tag', StringType(), True),
    StructField('Policy Date', StringType(), True),
    StructField('Product Family', StringType(), True),
    StructField('Logid', StringType(), True),
    StructField('Policy Rule UID', StringType(), True),
    StructField('Layer Name', StringType(), True),
    StructField('Needs Browse Time', StringType(), True),
    StructField('User', StringType(), True),
    StructField('Src User Dn', StringType(), True),
    StructField('Protocol', StringType(), True),
    StructField('Sig Id', StringType(), True),
    StructField('Reason', StringType(), True),
    StructField('Destination Machine Name', StringType(), True),
    StructField('Destination User Name', StringType(), True),
    StructField('Dst User Dn', StringType(), True),
    StructField('UserCheck ID', StringType(), True),
    StructField('Destination Object', StringType(), True),
    StructField('ICMP', StringType(), True),
    StructField('ICMP Type', StringType(), True),
    StructField('ICMP Code', StringType(), True),
    StructField('Client Name', StringType(), True),
    StructField('Product Version', StringType(), True),
    StructField('Domain Name', StringType(), True),
    StructField('Endpoint IP', StringType(), True),
    StructField('Authentication Status', StringType(), True),
    StructField('Identity Source', StringType(), True),
    StructField('Session ID', StringType(), True),
    StructField('Source Machine Group', StringType(), True),
    StructField('Authentication Method', StringType(), True),
    StructField('Identity Type', StringType(), True),
    StructField('Authentication Trial', StringType(), True),
    StructField('Source User Group', StringType(), True),
    StructField('Connection Id', StringType(), True),
    StructField('Last Update Time71', StringType(), True),
    StructField('Scheme', StringType(), True),
    StructField('Methods', StringType(), True),
    StructField('VPN Peer Gateway', StringType(), True),
    StructField('Community', StringType(), True),
    StructField('Mobile Access Session UID', StringType(), True),
    StructField('VPN Feature', StringType(), True),
    StructField('Duration', StringType(), True),
    StructField('Last Update Time79', StringType(), True),
    StructField('Update Count', StringType(), True),
    StructField('Creation Time', StringType(), True),
    StructField('Connections', StringType(), True),
    StructField('Aggregated Log Count', StringType(), True),
    StructField('_c84', StringType(), True)
])



In [5]:
df = spark.read.schema(schema).load(f'{base_path}datasets/dataset_fw_v1.csv',
                     format="csv", sep=",", inferSchema="false", header="true")

In [6]:
# Añadimos metadatos para simular eventos del sensor

df = df.withColumn('data', struct(col('*')))
df = df.withColumn('version', lit('1.0'))
df = df.withColumn('id' , lit('f0c48ba4-387d-11ea-a137-2e728ce88126'))
df = df.withColumn('type', lit('FW'))
df = df.withColumn('event', lit('DATA'))

In [10]:
# Creamos productor de eventos Kafka

ip_server = '192.168.1.101:9092'
kafka_topic = 'FW'
c = 0
sleep = 20
seed = 1

while True:
    df_sample = df.sample(fraction = 0.1, seed = seed)
    df_sample = df_sample.withColumn('time', unix_timestamp().cast(StringType()))
    
    prediction_features = df_sample.select('version','time','id','type','event','data')

    if type(kafka_topic) == bytes:
        kafka_topic = kafka_topic.decode('utf-8')
    producer = KafkaProducer(bootstrap_servers=[ip_server],api_version=(0,10))
    PREDICTION_TOPIC = kafka_topic

    #FUNCIONAL
    for row in prediction_features.toJSON().collect():
        print((row))
        producer.send(PREDICTION_TOPIC , row.encode())
        producer.flush()
    
    time.sleep(sleep)
    c = c + 1
    seed = seed + 1

{"version":"1.0","time":"1616580513","id":"f0c48ba4-387d-11ea-a137-2e728ce88126","type":"FW","event":"DATA","data":{"Time":"5/27/2020 11:22:46 AM","Blade":"Firewall","Action":"Accept","Type":"Connection","Origin":"SMS-GW-CHECK","Source":"SRV_DC_01 (10.1.200.11)","Destination":"srv2.telconet.net (200.93.192.161)","Service":"domain-udp (UDP/53)","Access Rule Number":"57","Access Rule Name":"Consulta DNS Domain Controllers","Policy Name":"Standard","Description":"domain-udp Traffic Accepted from  'srv-dc-01@coopcrea.fin.ec'(10.1.200.11) to 200.93.192.161","Id":"5b83a05e-a69f-e3fc-5ece-93d600010005","Marker":"@A@@B@1590555605@C@2969330","Log Server Origin":"SMS-GW-CHECK (10.1.201.1)","Interface Direction":"inbound","Interface Name":"eth1","Connection Direction":"Outgoing","Id Generated By Indexer":"false","First":"true","Sequencenum":"50","Source Zone":"Internal","Destination Zone":"External","Service ID":"domain-udp","Source Port":"64159","Destination Port":"53","IP Protocol":"UDP (17)","

KeyboardInterrupt: 

In [None]:
# Creamos productor de eventos Kafka
import json
ip_server = '192.168.1.101:9092'
kafka_topic = 'FW'
c = 0
sleep = 20
seed = 1
prediction_features = {"version":"1.0","time":"1616580513","id":"f0c48ba4-387d-11ea-a137-2e728ce88126","type":"FW","event":"DATA","data":{"Time":"1616580513","Blade":"Firewall","Action":"Accept","Type":"Connection","Origin":"SMS-GW-CHECK","Source":"SRV_DC_01 (10.1.200.11)","Destination":"srv2.telconet.net (200.93.192.161)","Service":"domain-udp (UDP/53)","Access Rule Number":"57","Access Rule Name":"Consulta DNS Domain Controllers","Policy Name":"Standard","Description":"domain-udp Traffic Accepted from  'srv-dc-01@coopcrea.fin.ec'(10.1.200.11) to 200.93.192.161","Id":"5b83a05e-a69f-e3fc-5ece-93d600010005","Marker":"@A@@B@1590555605@C@2969330","Log Server Origin":"SMS-GW-CHECK (10.1.201.1)","Interface Direction":"inbound","Interface Name":"eth1","Connection Direction":"Outgoing","Id Generated By Indexer":"false","First":"true","Sequencenum":"50","Source Zone":"Internal","Destination Zone":"External","Service ID":"domain-udp","Source Port":"64159","Destination Port":"53","IP Protocol":"UDP (17)","Xlate (NAT) Source IP":"SMS-GW-CHECK (179.49.29.10)","Xlate (NAT) Source Port":"54880","Xlate (NAT) Destination Port":"0","NAT Rule Number":"0","NAT Additional Rule Number":"0","Source Machine Name":"srv-dc-01@coopcrea.fin.ec","Hll Key":"9435118931605545409","Context Num":"1","Policy Management":"SMS-GW-CHECK","Db Tag":"{10B41B45-4E19-A745-9231-AE1687566AA9}","Policy Date":"Today  9:50:39","Product Family":"Access","Logid":"0","Policy Rule UID":"629deb9c-68ad-410e-87fa-e14059f0bd88","Layer Name":"Network"}}
#FUNCIONAL
json_object = json.dumps(prediction_features)   
row = json_object 
print((row))
producer.send(PREDICTION_TOPIC , row.encode())
producer.flush()

In [None]:
sc.stop()