In [6]:
from datetime import datetime, timezone
import os
from settings.profiles import PySparkJobProfile, Profile
from mldsl import *

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/apolatovskaya/key.json'

PROJECT_BASE_PATH = '/Users/apolatovskaya/git/ai4ops/dsl/test'
BUCKET = 'ai4ops-main-storage-bucket'
DATA_BASE_PATH = "nd_history"
CLUSTER = 'ai4ops-streaming'

## Streaming ND Ingest

In [7]:
SCRIPT_PATH = "{}/poc/spark/ingest".format(PROJECT_BASE_PATH)
CONFIG_NAME='job_streaming_part_kohls_nd_logged_01.json'
DURATION = 60
BATCH_DURATION = 60
MAX_FAILURES_PER_HOUR = 3
POOL_SIZE = 1
METRIC_DB_TABLE = 'metric_rt_synthetic'
LOG_METRIC_DB_TABLE = 'log_metric_try'
METRIC_ALIAS_DB_TABLE = 'metric_alias'

PROJECT_ID='ai4ops_streaming_nd_ingest_bg'
DB_SECRET="kohls_db.txt"
ND_SECRET="kohls_nd.txt"
token_file_gcs_path = "gs://{}/resources/{}".format(BUCKET, ND_SECRET)
db_credentials_file_gcs_path = "gs://{}/resources/{}".format(BUCKET, DB_SECRET)


#job properties
py_files = ["apigee_ingest_utils.py", "ai4ops_db.py", 
            "yarn_logging.py", "apigee_history_ingest.py","nd_ingest.py"]
PY_FILES = [os.path.join(SCRIPT_PATH, i) for i in py_files]
FILES = [os.path.join(SCRIPT_PATH, 'jobs', CONFIG_NAME)]
JARS = ["gs://{0}/resources/mysql-connector-java-8.0.16.jar".format(BUCKET), \
        "gs://{0}/resources/spark.http.apigee-1.0-SNAPSHOT-jar-with-dependencies.jar".format(BUCKET)]

properties = {"spark.executor.cores":"1",
              "spark.executor.memory":"4G",
              "spark.executor.instances":"2",
              "spark.dynamicAllocation.enabled":"false",
              "spark.streaming.dynamicAllocation.enabled":"false"}

In [8]:
#Profile info
pysparkjob_profile = PySparkJobProfile(root_path=SCRIPT_PATH, bucket=BUCKET,\
                                       project='kohls-kos-cicd', cluster=CLUSTER,\
                                       region='global', ai_region='us-central1',\
                                       job_prefix=PROJECT_ID, job_async=True)
pysparkjob_profile.py_files = PY_FILES
pysparkjob_profile.files=FILES
pysparkjob_profile.jars=JARS
pysparkjob_profile.properties=properties
pysparkjob_profile.max_failures = 3
pysparkjob_profile.args = {'--tasks_file_path':CONFIG_NAME, 
                           '--token_file_gcs_path':token_file_gcs_path,
                           '--db_credentials_file_gcs_path':db_credentials_file_gcs_path, 
                           '--res_path':'/opt/dataproc/.resources',
                           '--duration': str(DURATION), 
                           '--pool_size': str(POOL_SIZE), 
                           '--batch_duration':str(BATCH_DURATION), 
                           '--metric_db_table': METRIC_DB_TABLE,
                           '--log_metric_db_table': LOG_METRIC_DB_TABLE,
                           '--alias_table': METRIC_ALIAS_DB_TABLE}
Profile.set(PROJECT_ID, pysparkjob_profile)
platform = Platform.GCP
Profile.get(PROJECT_ID).__dict__

{'root_path': '/Users/apolatovskaya/git/ai4ops/dsl/test/poc/spark/ingest',
 'bucket': 'ai4ops-main-storage-bucket',
 'project': 'kohls-kos-cicd',
 'cluster': 'ai4ops-streaming',
 'region': 'global',
 'ai_region': 'us-central1',
 'job_prefix': 'ai4ops_streaming_nd_ingest_bg',
 'job_async': True,
 'py_files': ['/Users/apolatovskaya/git/ai4ops/dsl/test/poc/spark/ingest/apigee_ingest_utils.py',
  '/Users/apolatovskaya/git/ai4ops/dsl/test/poc/spark/ingest/ai4ops_db.py',
  '/Users/apolatovskaya/git/ai4ops/dsl/test/poc/spark/ingest/yarn_logging.py',
  '/Users/apolatovskaya/git/ai4ops/dsl/test/poc/spark/ingest/apigee_history_ingest.py',
  '/Users/apolatovskaya/git/ai4ops/dsl/test/poc/spark/ingest/nd_ingest.py'],
 'files': ['/Users/apolatovskaya/git/ai4ops/dsl/test/poc/spark/ingest/jobs/job_streaming_part_kohls_nd_logged_01.json'],
 'jars': ['gs://ai4ops-main-storage-bucket/resources/mysql-connector-java-8.0.16.jar',
  'gs://ai4ops-main-storage-bucket/resources/spark.http.apigee-1.0-SNAPSHOT-ja

In [9]:
%%py_script --name nd_streaming_ingest_blue_green.py --path ./test/poc/spark/ingest
# %py_load ./test/poc/spark/ingest/nd_streaming_ingest_blue_green.py
#!/usr/bin/python
import argparse
import json

# noinspection PyProtectedMember
from pyspark import SparkContext, SQLContext
# noinspection PyProtectedMember
from pyspark.serializers import NoOpSerializer
from pyspark.streaming import DStream, StreamingContext
from ai4ops_db import DB
from apigee_ingest_utils import ApigeeIngest
from nd_ingest import (decrypt, store_nd_metrics_with_logged_agg, TIER_ALIAS_NODE, AS_LOG_NODE)

ALIAS_TABLE = 'metric_alias_table'


def read_credentials(spr, cred_file_gcs_path, prefix, resource_path):
    cred_rows = spr.read.text(cred_file_gcs_path).collect()
    cred_file_path = '{}.log'.format(prefix)
    with open(cred_file_path, 'w') as fp:
        fp.write(cred_rows[0][0])
    return json.loads(ApigeeIngest.dcr(resource_path + '/resource.txt', cred_file_path).decode('utf-8'))


def group_by_metric(a):
    return a.groupby([DB.METRIC])


def store_nd_metrics_rdd(rdd, db, metric_db_table, log_metric_db_table, tier_alias, as_log):
    collected_rdd = rdd.collect()
    if len(collected_rdd) == 0:
        return

    metrics, logged_metrics = store_nd_metrics_with_logged_agg(collected_rdd, tier_alias, as_log)

    if metrics is not None:
        if metrics.shape[0] > 0:
            print('Store metrics to db. Printing 5 head metrics ...')
            print(metrics.head(5))
            db.direct_upsert_to_db(metrics[DB.metrics_schema().names],
                                   group_by_metric,
                                   metric_db_table,
                                   DB.metrics_schema().names)

    if logged_metrics is not None:
        if logged_metrics.shape[0] > 0:
            print('Store logged metrics to db. Printing 5 head metrics ...')
            print(logged_metrics.head(5))
            db.direct_upsert_to_db(logged_metrics[DB.error_metrics_schema().names],
                                   group_by_metric,
                                   log_metric_db_table,
                                   DB.error_metrics_schema().names)


class StreamUtils(object):
    @staticmethod
    def createStream(streaming_context, input_tasks, db_config, input_token, pool_size, duration):
        # noinspection PyProtectedMember
        j_duration = streaming_context._jduration(duration)
        try:
            # noinspection PyProtectedMember
            helper = streaming_context._jvm.com.kohls.spark.streaming.http.nd.bluegreen.HttpNDHelper()
        except TypeError as e:
            if str(e) == "'JavaPackage' object is not callable":
                StreamUtils._printErrorMsg(streaming_context.sparkContext)
            raise

        # noinspection PyProtectedMember
        j_stream = helper.createStreamFromJson(streaming_context._jssc, json.dumps(input_tasks),
                                               db_config, input_token, pool_size, j_duration)
        stream = DStream(j_stream, streaming_context, NoOpSerializer())
        x = stream.map(lambda v: v)
        logger.info(type(x))
        return x

    @staticmethod
    def _printErrorMsg(context):
        print("Spark Streaming's DB libraries not found in class path. {}".format(context.version))


if __name__ == '__main__':
    sc = SparkContext(appName="nd_streaming_ingest").getOrCreate()
    sc.addPyFile('yarn_logging.py')
    import yarn_logging

    logger = yarn_logging.YarnLogger()
    sql_context = SQLContext(sc)
    parser = argparse.ArgumentParser()
    parser.add_argument('--tasks_file_path', type=str, help='tasks file path on cluster file system')
    parser.add_argument('--token_file_path', type=str, help='token file path')
    parser.add_argument('--token_file_gcs_path', type=str, help='token file GCS path')
    parser.add_argument('--db_credentials_file_gcs_path', type=str, help='db credentials file path on GCS')
    parser.add_argument('--res_path', type=str, help='resources directory path')
    parser.add_argument('--duration', default=60, type=int, help='check point interval in seconds')
    parser.add_argument('--pool_size', type=int, help='Streaming pool size', default=1)
    parser.add_argument('--batch_duration', type=int, help='Streaming batchDuration in seconds', default=1)
    parser.add_argument('--metric_db_table', default='metric_rt_synthetic', type=str, help='DB metric table name')
    parser.add_argument('--alias_table', default='metric_alias', type=str, help='Alias table')
    parser.add_argument('--log_metric_db_table', default='log_metric_try', type=str, help='DB metric table name')

    args, unknown = parser.parse_known_args()
    token_file_path = None
    token_file_gcs_path = None
    tasks_file_path = None
    output_file_pattern_path = None
    res_path = None

    ssc = StreamingContext(sc, int(args.batch_duration))

    if args.tasks_file_path is None:
        exit(1)

    tasks_file_path = args.tasks_file_path

    if args.token_file_path is None and args.token_file_gcs_path is None:
        print('Token files are not found')
        exit(1)

    token_file_path = args.token_file_path
    token_file_gcs_path = args.token_file_gcs_path

    if args.res_path is None:
        exit(1)

    res_path = args.res_path

    if token_file_path is not None:
        token = json.loads(decrypt(res_path + '/resource.txt', token_file_path))
    else:
        token_rows = sql_context.read.text(token_file_gcs_path).collect()
        token_file_path = 'token.log'
        with open(token_file_path, 'w') as f:
            f.write(token_rows[0][0])
        token = json.loads(decrypt(res_path + '/resource.txt', token_file_path))

    with open(tasks_file_path) as f:
        tasks = json.load(f)
    tier_alias_dict = tasks.get(TIER_ALIAS_NODE, {})
    as_log_list = tasks.get(AS_LOG_NODE, [])
    db_credentials_rows = sql_context.read.text(args.db_credentials_file_gcs_path).collect()
    db_credentials_file_path = 'db.log'
    with open(db_credentials_file_path, 'w') as f:
        f.write(db_credentials_rows[0][0])
    db_credentials = json.loads(
        decrypt(args.res_path + '/resource.txt', db_credentials_file_path).decode('utf-8'))
    db_obj = DB(db_credentials)
    db_credentials = read_credentials(sql_context, args.db_credentials_file_gcs_path, 'db', args.res_path)
    db_credentials[ALIAS_TABLE] = args.alias_table

    stm = StreamUtils.createStream(ssc,
                                   tasks,
                                   db_credentials,
                                   token,
                                   args.pool_size,
                                   args.duration)
    stm.foreachRDD(lambda rdd: store_nd_metrics_rdd(rdd,
                                                    db_obj,
                                                    args.metric_db_table,
                                                    args.log_metric_db_table,
                                                    tier_alias_dict,
                                                    as_log_list))

    ssc.start()
    ssc.awaitTermination()


<py_script.PyScript at 0x10ef49e48>

In [10]:
%py_data -n nd_streaming_ingest_blue_green.py -p $PROJECT_ID -pm $platform -o gs://ai4ops/metric_rt_synthetic\

Uploading file to dir: jobs-root/ai4ops_streaming_nd_ingest_bg_1574804870/apigee_ingest_utils.py
Uploading file to dir: jobs-root/ai4ops_streaming_nd_ingest_bg_1574804870/ai4ops_db.py
Uploading file to dir: jobs-root/ai4ops_streaming_nd_ingest_bg_1574804870/yarn_logging.py
Uploading file to dir: jobs-root/ai4ops_streaming_nd_ingest_bg_1574804870/apigee_history_ingest.py
Uploading file to dir: jobs-root/ai4ops_streaming_nd_ingest_bg_1574804870/nd_ingest.py
Uploading file to dir: jobs-root/ai4ops_streaming_nd_ingest_bg_1574804870/job_streaming_part_kohls_nd_logged_01.json
Job with id ai4ops_streaming_nd_ingest_bg_1574804870 was submitted to the cluster ai4ops-streaming


<IPython.core.display.JSON object>

In [None]:
#Use job_ai4ops_streaming_nd_ingest_bg_1574804870 instance to browse job properties.
job_ai4ops_streaming_nd_ingest_bg_1574804870 = job_tracker['ai4ops_streaming_nd_ingest_bg_1574804870']

In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_streaming_nd_ingest_bluegreen.sh "${SCRIPT_PATH}" "${CONFIG_NAME}" \
"${BUCKET}" "${CLUSTER}" ${DURATION} ${POOL_SIZE} ${BATCH_DURATION} ${MAX_FAILURES_PER_HOUR} \
"${METRIC_DB_TABLE}" "${LOG_METRIC_DB_TABLE}" "${METRIC_ALIAS_DB_TABLE}"

## Log Errors HTM Anomaly Detection

In [None]:
import os
PROJECT_BASE_PATH = '{}/poc'.format('/Users/arodin/Documents/projects/kohls')
BUCKET = 'ai4ops-main-storage-bucket'
CLUSTER = 'ai4ops-streaming'
os.environ['BUCKET'] = BUCKET
os.environ['CLUSTER'] = CLUSTER

In [None]:
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['DURATION'] = '60'
os.environ['BATCH_DURATION'] = '30'
os.environ['MAX_FAILURES_PER_HOUR'] = '3'
os.environ['ENVIRONMENT'] = 'production'
os.environ['PARTITIONS'] = '3'
os.environ['PERSISTENCE'] = 'db'


In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_log_metrics_anomaly.sh "${SCRIPT_PATH}" "${BUCKET}" "${CLUSTER}" "${ENVIRONMENT}" \
${BATCH_DURATION} ${DURATION} ${PARTITIONS} "${PERSISTENCE}" ${MAX_FAILURES_PER_HOUR}

## Error Logs Ingest 

In [None]:
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
os.environ['CLUSTER'] = 'ai4ops-streaming'
os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['SUBSCRIPTION'] = 'projects/kohls-kos-cicd/subscriptions/ai4ops_logs'
os.environ['BATCH_DURATION'] = '60'
os.environ['BATCH_SIZE'] = '1000'
os.environ['PARTITIONS'] = '8'
os.environ['STREAMS'] = '2'
os.environ['DB_SKIP_OLDER_THAN'] = '30'
os.environ['DB_CHUNK_SIZE'] = '10000'
os.environ['MAX_FAILURES_PER_HOUR'] = '3'


In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_logs_ingest.sh "${SCRIPT_PATH}" "${BUCKET}" "${CLUSTER}" \
"${SUBSCRIPTION}" ${BATCH_DURATION} ${BATCH_SIZE} ${PARTITIONS} ${STREAMS} "${OUTPUT_MODE}" "${TARGET_DS}" \
${DB_SKIP_OLDER_THAN} ${DB_CHUNK_SIZE} ${MAX_FAILURES_PER_HOUR}



## Start ND Streaming Ingest


In [None]:
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
CONFIG_NAME='job_streaming_part_kohls_nd_01.json'
os.environ['CLUSTER_S'] = 'ai4ops'

os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['CONFIG_NAME'] = CONFIG_NAME
os.environ['DURATION'] = '60'
os.environ['BATCH_DURATION'] = '60'
os.environ['MAX_FAILURES_PER_HOUR'] = '3'
os.environ['POOL_SIZE'] = '2'

In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_streaming_nd_ingest_bluegreen.sh "${SCRIPT_PATH}" "${CONFIG_NAME}" \
"${BUCKET}" "${CLUSTER_S}" ${DURATION} ${POOL_SIZE} ${BATCH_DURATION} ${MAX_FAILURES_PER_HOUR}

## Start Apigee Streaming Ingest (Part 1)

In [None]:
os.environ['CLUSTER_S'] = 'ai4ops'
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
CONFIG_NAME='job_streaming_part_kohls_3m_01_ext.json'
os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['CONFIG_NAME'] = CONFIG_NAME
os.environ['DURATION'] = '60'
os.environ['CHECKPOINT_GCS_PATH'] = ''
os.environ['MAX_FAILURES_PER_HOUR'] = '3'
os.environ['POOL_SIZE'] = '2'

In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_streaming_ingest.sh "${SCRIPT_PATH}" "${CONFIG_NAME}" \
"${BUCKET}" "${CLUSTER_S}" "${DURATION}" "${CHECKPOINT_GCS_PATH}" \
${MAX_FAILURES_PER_HOUR} ${POOL_SIZE}

## Start Apigee Streaming Ingest (Part 2)

In [None]:
os.environ['CLUSTER_S'] = 'ai4ops'
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
CONFIG_NAME='job_streaming_part_kohls_3m_02_ext.json'
os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['CONFIG_NAME'] = CONFIG_NAME
os.environ['DURATION'] = '60'
os.environ['CHECKPOINT_GCS_PATH'] = ''
os.environ['MAX_FAILURES_PER_HOUR'] = '3'
os.environ['POOL_SIZE'] = '2'

In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_streaming_ingest.sh "${SCRIPT_PATH}" "${CONFIG_NAME}" \
"${BUCKET}" "${CLUSTER_S}" "${DURATION}" "${CHECKPOINT_GCS_PATH}" \
${MAX_FAILURES_PER_HOUR} ${POOL_SIZE}

## Anomaly Thresholds

In [None]:
os.environ['CLUSTER'] = 'ai4ops-streaming'
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
CONFIG_NAME='job_streaming_anomaly_analytics_nd_7d.json'
os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['ANALYTICS_CONFIGURATION_NAME'] = CONFIG_NAME
os.environ['DURATION'] = '60'
os.environ['MAX_FAILURES_PER_HOUR'] = '3'
os.environ['POOL_SIZE'] = '1'

In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_streaming_anomaly_analytics.sh "${SCRIPT_PATH}" "${BUCKET}" "${CLUSTER}" \
"${ANALYTICS_CONFIGURATION_NAME}" ${DURATION} ${MAX_FAILURES_PER_HOUR}

## Anomaly Detection (Apigee + ND)

In [None]:
os.environ['CLUSTER'] = 'ai4ops-streaming'
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
# CONFIG_NAME = 'partial_lgbm_moving_anomalies_detection_union_4m_with_nobg_02.json'
# Added Likelihood for ND_CPU_NOBG
# CONFIG_NAME = 'partial_lgbm_moving_anomalies_detection_union_4m_with_nobg_02_lkh.json'
# Added Likelihood for ND_CPU_NOBG and ND_THROUGHPUT_NOBG
CONFIG_NAME = 'partial_lgbm_moving_anomalies_detection_union_4m_with_nobg_03_lkh.json'
os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['CONFIG'] = CONFIG_NAME
os.environ['DURATION'] = '60'
os.environ['MAX_FAILURES_PER_HOUR'] = '3'
os.environ['POOL_SIZE'] = '2'

In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_partial_streaming_moving_anomaly_detection.sh "${SCRIPT_PATH}" "${CONFIG}" \
"${BUCKET}" "${CLUSTER}" ${DURATION} ${POOL_SIZE} ${MAX_FAILURES_PER_HOUR}



## Alerts

In [None]:
os.environ['CLUSTER'] = 'ai4ops-streaming'
SCRIPT_PATH = "{}/spark/ingest".format(PROJECT_BASE_PATH)
os.environ['SCRIPT_PATH'] = SCRIPT_PATH
os.environ['DURATION'] = '60'
os.environ['POOL_SIZE'] = '3'
os.environ['EMAIL_TEMPLATE_NAME'] = 'alert_email_template_multiple.html'
os.environ['MAX_FAILURES_PER_HOUR'] = '3'
os.environ['ALERTS_TABLE'] = 'alerts_temp'
os.environ['ENVIRONMENT'] = 'production'


In [None]:
%%bash
bash ${SCRIPT_PATH}/submit_streaming_alerts_dsl_conf.sh "${SCRIPT_PATH}" "${BUCKET}" "${CLUSTER}" \
${DURATION} ${POOL_SIZE} "${EMAIL_TEMPLATE_NAME}" ${MAX_FAILURES_PER_HOUR} "${ALERTS_TABLE}" "${ENVIRONMENT}"
