In [1]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/telemetry/spark-streaming-kafka-assembly_2.11-1.6.3.jar pyspark-shell'
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
from pyspark.sql.functions import explode
from __future__ import print_function
from pyspark.sql.types import *
import requests
import json
import numbers
import ast
import time
from collections import OrderedDict
sc = SparkContext()
ssc = StreamingContext(sc, 1)
kafkaParams = {"metadata.broker.list": "kafka:9092", "auto.offset.reset": "largest"}
topic = "telemetryxe"
sqlContext = SQLContext(sc)

kafkaStream = KafkaUtils.createDirectStream(ssc,[topic],kafkaParams)

#kafka_rdd = kafkaStream.map(lambda (k,v): v)
kafka_rdd = kafkaStream.map(lambda v: v[1])

In [2]:
def transform(rdd):
    json_data = sqlContext.read.json(rdd)
    # NX Data comes out weird, need to convert (Single Quotes instead of double quote issues)
    for row in json_data.collect():
        data = row.asDict()
        timestamp = data['eventTime']
        epoch = time_converter(timestamp)
        content = data['push-update'].asDict()
        tags_master = {
            'NodeID' : 'c0-iosxe',
            'Subscription' : content['subscription-id']
            }
        metrics = {
                    "metric": 'metric',
                    "timestamp": 'timestamp',
                    "value": 'value',
                    "tags": 'tags'
                }
        metrics['timestamp'] = epoch
        content_data = content['datastore-contents-xml'].asDict()
        if 'interfaces-state' in content_data and content_data['interfaces-state']:
            interfaces = content_data['interfaces-state'].asDict()
            for interface in interfaces['interface']:
                result = interface_load(interface.asDict(), tags_master, metrics)
                tsdb_api_put(json.loads(result))
        if 'cpu_usage' in content_data and content_data['cpu_usage']:
            cpu = content_data['cpu-usage'].asDict()
            result = cpu_load(cpu, tags_master, metrics)
            tsdb_api_put(json.loads(result))
        if 'bgp-state-data' in content_data and content_data['bgp-state-data']:
            bgp = content_data['bgp-state-data'].asDict()
            result = bgp_load(bgp, tags_master, metrics)
            tsdb_api_put(json.loads(result))
    return

In [3]:
def time_converter(timestamp):
    from datetime import datetime
    utc_time = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
    epoch_time = (utc_time - datetime(1970, 1, 1)).total_seconds()
    return epoch_time       

In [4]:
def bgp_load(bgp, tags_master, metrics):
    bgp_info = bgp['neighbors'].asDict()['address-families'].asDict()['address-family'].asDict()
    prefixes = bgp_info['prefixes'].asDict()
    paths = bgp_info['path'].asDict()
    tsdb = []
    tags = tags_master.copy()
    metrics_copy = metrics.copy()
    metrics_copy['metric'] = 'path'
    metrics_copy['value'] = paths['total-entries']
    metrics_copy['tags'] = tags
    tsdb.append(metrics_copy)
    metrics_copy2 = metrics.copy()
    metrics_copy2['metric'] = 'total_prefixes'
    metrics_copy2['value'] = prefixes['total-entries']
    metrics_copy2['tags'] = tags
    tsdb.append(metrics_copy2)
    return json.dumps(tsdb)


In [5]:
def cpu_load(cpu, tags_master, metrics):
    tsdb = []
    tags = tags_master.copy()
    for metric in cpu.keys():
        metrics_copy = metrics.copy()
        metrics_copy['metric'] = metric
        metrics_copy['value'] = cpu[metric]
        metrics_copy['tags'] = tags
        tsdb.append(metrics_copy)
    return json.dumps(tsdb)

In [6]:
def interface_load(interface, tags_master, metrics):
    tsdb = []
    tags = tags_master.copy()
    if interface['name'] == 'Control Plane':
        interface['name'] = 'Control_Plane'
    tags['interface_name'] = interface['name']
    stats = interface['statistics'].asDict()
    for metric in stats.keys():
        if not metric == 'discontinuity-time':
            metrics_copy = metrics.copy()
            metrics_copy['metric'] = metric
            metrics_copy['value'] = stats[metric]
            metrics_copy['tags'] = tags
            tsdb.append(metrics_copy)
    return json.dumps(tsdb)

In [7]:
def tsdb_api_put(data):
    if data:
        host = 'opentsdb:4242'
        openTsdbUrl = 'http://' + host + '/api/put/details'
        request = requests.post(openTsdbUrl, json = data)
        if request.text:
            print(request.text)

In [8]:
kafka_rdd.foreachRDD(lambda rdd: sc.parallelize(transform(rdd)))
#kafka_rdd.pprint()
ssc.start()
#ssc.awaitTermination()

In [None]:
ssc.stop()