In [1]:
import sys
import os

#sys.path.insert(0, '/opt/cloudera/parcels/CDH/lib/spark/python/')
#sys.path.insert(0, '/opt/cloudera/parcels/CDH/lib/spark/python/lib/py4j-0.9-src.zip')

os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-oracle/jre"
os.environ["SPARK_HOME"] = "/opt/cloudera/parcels/CDH/lib/spark"
os.environ["PYLIB"] = os.environ["SPARK_HOME"] + "/python/lib"
os.environ["PYSPARK_PYTHON"] = "/opt/cloudera/parcels/Anaconda/bin/python"

sys.path.insert(0, os.environ["PYLIB"] +"/py4j-0.9-src.zip")
sys.path.insert(0, os.environ["PYLIB"] +"/pyspark.zip")

In [2]:
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
from pyspark.sql.functions import explode
from __future__ import print_function
from pyspark.sql.types import *
import requests
import json
import numbers
import ast
import time
from collections import OrderedDict
SparkContext.setSystemProperty('spark.executor.memory', '8g')
sc = SparkContext('local[4]', 'test-spark0')
ssc = StreamingContext(sc, 1)
kafkaParams = {"metadata.broker.list": "pnda14.gspie.lab:9092,pnda15.gspie.lab:9092,pnda13.gspie.lab:9092", "auto.offset.reset": "largest"}
topic = "telemetrynx1"
sqlContext = SQLContext(sc)

kafkaStream = KafkaUtils.createDirectStream(ssc,[topic],kafkaParams)

#kafka_rdd = kafkaStream.map(lambda (k,v): v)
kafka_rdd = kafkaStream.map(lambda v: json.loads(v[1]))

In [3]:
path1 = "show interface counters"
path2 = "show bgp all summary"
path3 = "show hardware internal buffer info pkt-stats"
path4 = "show hardware internal buffer info pkt-stats peak"
path5 = "show system resources"

In [4]:
def transform(rdd):
    json_data = sqlContext.read.json(rdd)
    # NX Data comes out weird, need to convert (Single Quotes instead of double quote issues)
    data_broken = json_data.collect()[0].asDict()['_corrupt_record']
    data = data_broken.replace("u'", "'")
    data = ast.literal_eval(data)
    tags_master = {
        'NodeID' : data['Telemetry']['node_id_str'],
        'EncodingPath' : data['Telemetry']['encoding_path'].replace(" ", "-")
    }
    for row in data["Rows"]:
        metrics = {
            "metric": 'metric',
            "timestamp": 'timestamp',
            "value": 'value',
            "tags": 'tags'}
        content_keys_master = {}
        content_keys = content_keys_master.copy()
        tags = tags_master.copy()
        # metrics['timestamp'] = data["Telemetry"]['msg_timestamp']/1000
        metrics['timestamp'] = time.time()
        content = row['Content']
        tsdb =  []
        if data['Telemetry']['encoding_path'] == path1:
            interface_loader(metrics, content, 'rx', tags)
            interface_loader(metrics, content, 'tx', tags)
        if data['Telemetry']['encoding_path'] == path2:
            result = bgp_loader(metrics, content, tags)
            tsdb_api_put(result)
        if data['Telemetry']['encoding_path'] == path3:
            result = buffers_peak_loader(metrics, content, tags)
            tsdb_api_put(result)
        if data['Telemetry']['encoding_path'] == path4:
            result = buffers_peak_loader(metrics, content, tags)
            tsdb_api_put(result)
    return

In [5]:
def buffers_loader(metrics, content, tags):
    print(content)
    tsdb = []
    tags_copy = tags.copy()
    instance = content['']['TABLE_module']['']['ROW_module']['']
    tags_copy['module_number'] = instance['module_number']
    values = instance['TABLE_instance']['']['ROW_instance']['']
    tags_copy['instance'] = values['instance']
    del values['instance']
    for key in values.keys():
        metrics_copy = metrics.copy()
        metrics_copy['metric'] = key
        metrics_copy['value'] = values[key]
        metrics_copy['tags'] = tags_copy
        tsdb.append(metrics_copy)
    return json.loads(json.dumps(tsdb))

In [6]:
def buffers_peak_loader(metrics, content, tags):
    tsdb = []
    tags_copy = tags.copy()
    instance = content['']['TABLE_module']['']['ROW_module']['']
    tags_copy['module_number'] = instance['module_number']
    values = instance['TABLE_instance']['']['ROW_instance']['']
    tags_copy['instance'] = values['instance']
    del values['instance']
    for key in values.keys():
        if key == 'TABLE_interface':
            ports = values['TABLE_interface']['']['ROW_interface']['_PIPELINE_EDIT']
            for item in ports:
                if 'peak_stats_start' not in item and 'stats_start' not in item:
                    tsdb_inner = []
                    tags_inner = tags_copy.copy()
                    tags_inner['front_port'] = item['front_port']
                    del item['front_port']
                    for counter in item.keys():
                        metrics_copy = metrics.copy()
                        metrics_copy['metric'] = counter
                        metrics_copy['value'] = item[counter]
                        metrics_copy['tags'] = tags_inner
                        tsdb_inner.append(metrics_copy)
                    tsdb_api_put(json.loads(json.dumps(tsdb_inner)))
        else:
            metrics_copy = metrics.copy()
            metrics_copy['metric'] = key
            metrics_copy['value'] = values[key]
            metrics_copy['tags'] = tags_copy
            tsdb.append(metrics_copy)
    return json.loads(json.dumps(tsdb))

In [7]:
def bgp_loader(metrics, content, tags):
    tsdb = []
    tags_copy = tags.copy()
    keys = ['totalpaths', 'totalnetworks']
    vrf = content['']['TABLE_vrf']['']['ROW_vrf']['']
    bgp = vrf['TABLE_af']['']['ROW_af']['_PIPELINE_EDIT']
    for saf in bgp:
        bgp_info = saf['TABLE_saf']['']['ROW_saf']['']
        for key in keys:
            if key in bgp_info:
                metrics_copy = metrics.copy()
                metrics_copy['metric'] = key
                metrics_copy['value'] = bgp_info[key]
                metrics_copy['tags'] = tags_copy
                tsdb.append(metrics_copy)
    return json.loads(json.dumps(tsdb))

In [8]:
def interface_loader(metrics, content, way, tags):
    tsdb = []
    tags_copy = tags.copy()
    segment = content['']['TABLE_{}_counters'.format(way)]['']['ROW_{}_counters'.format(way)]['_PIPELINE_EDIT']
    for interface in segment:
        tags_copy['interface_name'] = interface['interface_{}'.format(way)]
        for key in interface.keys():
            if isinstance(interface[key], numbers.Number):
                metrics_copy = metrics.copy()
                metrics_copy['metric'] = key
                metrics_copy['value'] = interface[key]
                metrics_copy['tags'] = tags_copy
                tsdb_api_put(metrics_copy)
    return
                

In [9]:
def system_loader(metrics, content, tags):
    tsdb = []
    tags_copy = tags.copy()
    segment = content['']
    metrics_wanted = [
        'cpu_state_idle',
        'cpu_state_kernel',
        'cpu_state_user',
        'memory_usage_used',
        'memory_usage_free',
        'memory_usage_total']
    for key in metrics_wanted:
        metrics_copy = metrics.copy()
        metrics_copy['metric'] = key
        metrics_copy['value'] = segment[key]
        metrics_copy['tags'] = tags_copy
        tsdb.append(metrics_copy)
    return json.loads(tsdb)

In [10]:
def tsdb_api_put(data):
    if data:
        host = 'gspie-opentsdb.cisco.com:4242'
        openTsdbUrl = 'http://' + host + '/api/put/details'
        request = requests.post(openTsdbUrl, json = data)
        if request.text:
            print(request.text)
            print(data)

In [11]:
kafka_rdd.foreachRDD(lambda rdd: sc.parallelize(transform(rdd)))
#kafka_rdd.pprint()
ssc.start()
#ssc.awaitTermination()

In [None]:
ssc.stop()