In [None]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars /home/jovyan/telemetry/spark-streaming-kafka-assembly_2.11-1.6.3.jar pyspark-shell'
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
from pyspark.sql.functions import explode
from __future__ import print_function
from pyspark.sql.types import *
import requests
import json
import numbers
import ast
from collections import OrderedDict
sc = SparkContext()
ssc = StreamingContext(sc, 1)
kafkaParams = {"metadata.broker.list": "localhost:9092", "auto.offset.reset": "largest"}
topic = "telemetrynx"
sqlContext = SQLContext(sc)

kafkaStream = KafkaUtils.createDirectStream(ssc,[topic],kafkaParams)

#kafka_rdd = kafkaStream.map(lambda (k,v): v)
kafka_rdd = kafkaStream.map(lambda v: json.loads(v[1]))

In [None]:
path1 = "show interface counters"
path2 = "show bgp all summary"

In [None]:
def transform(rdd):
    json_data = sqlContext.read.json(rdd)
    # NX Data comes out weird, need to convert (Single Quotes instead of double quote issues)
    data_broken = json_data.collect()[0].asDict()['_corrupt_record']
    data = data_broken.replace("u'", "'")
    data = ast.literal_eval(data)
    tags_master = {
        'NodeID' : data['Telemetry']['node_id_str'],
        'EncodingPath' : data['Telemetry']['encoding_path'].replace(" ", "-")
    }
    for row in data["Rows"]:
        metrics = {
            "metric": 'metric',
            "timestamp": 'timestamp',
            "value": 'value',
            "tags": 'tags'}
        content_keys_master = {}
        content_keys = content_keys_master.copy()
        tags = tags_master.copy()
        metrics['timestamp'] = data["Telemetry"]['msg_timestamp']/1000
        content = row['Content']
        tsdb =  []
        if data['Telemetry']['encoding_path'] == path1:
            interface_loader(metrics, content, 'rx', tags)
            interface_loader(metrics, content, 'tx', tags)
        if data['Telemetry']['encoding_path'] == path2:
            result = bgp_loader(metrics, content, tags)
            tsdb_api_put(result)
    return

In [None]:
def bgp_loader(metrics, content, tags):
    tsdb = []
    tags_copy = tags.copy()
    keys = ['totalpaths', 'totalnetworks']
    vrf = content['']['TABLE_vrf']['']['ROW_vrf']['']
    bgp = vrf['TABLE_af']['']['ROW_af']['_PIPELINE_EDIT']
    for saf in bgp:
        bgp_info = saf['TABLE_saf']['']['ROW_saf']['']
        for key in keys:
            if key in bgp_info:
                metrics_copy = metrics.copy()
                metrics_copy['metric'] = key
                metrics_copy['value'] = bgp_info[key]
                metrics_copy['tags'] = tags_copy
                tsdb.append(metrics_copy)
    return json.loads(json.dumps(tsdb))

In [None]:
def interface_loader(metrics, content, way, tags):
    tsdb = []
    tags_copy = tags.copy()
    segment = content['']['TABLE_{}_counters'.format(way)]['']['ROW_{}_counters'.format(way)]['_PIPELINE_EDIT']
    for interface in segment:
        tags_copy['interface_name'] = interface['interface_{}'.format(way)]
        for key in interface.keys():
            if isinstance(interface[key], numbers.Number):
                metrics_copy = metrics.copy()
                metrics_copy['metric'] = key
                metrics_copy['value'] = interface[key]
                metrics_copy['tags'] = tags_copy
                tsdb_api_put(metrics_copy)
    return
                

In [None]:
def system_loader(metrics, content, tags):
    tsdb = []
    tags_copy = tags.copy()
    segment = content['']
    metrics_wanted = [
        'cpu_state_idle',
        'cpu_state_kernel',
        'cpu_state_user',
        'memory_usage_used',
        'memory_usage_free',
        'memory_usage_total']
    for key in metrics_wanted:
        metrics_copy = metrics.copy()
        metrics_copy['metric'] = key
        metrics_copy['value'] = segment[key]
        metrics_copy['tags'] = tags_copy
        tsdb.append(metrics_copy)
    return json.loads(tsdb)

In [None]:
def tsdb_api_put(data):
    if data:
        host = 'opentsdb:4242'
        openTsdbUrl = 'http://' + host + '/api/put/details'
        request = requests.post(openTsdbUrl, json = data)
        if request.text:
            print(request.text)

In [None]:
kafka_rdd.foreachRDD(lambda rdd: sc.parallelize(transform(rdd)))
#kafka_rdd.pprint()
ssc.start()
#ssc.awaitTermination()

In [None]:
ssc.stop()