## Train & Inference Version

In [None]:
script_option = "Train"
#script_option = "Inference"

#--------------------------------------------------------------------------------------------
#---------------------------------Part 1: Set Ups -------------------------------------------
#--------------------------------------------------------------------------------------------

from prophet import Prophet
import numpy as np
import pandas as pd
import mysql.connector
from pandas_profiling import ProfileReport
import datetime
from prophet.serialize import model_to_json, model_from_json
import pickle
import warnings
warnings.filterwarnings("ignore")

def fit_model(dataframe, interval_width=0.99, changepoint_range=0.8):
    '''
        Input: 

        Output: a forecasted dataframe includes

    '''

    m = Prophet(daily_seasonality=False, yearly_seasonality=False, weekly_seasonality=False,
                seasonality_mode='multiplicative',
                interval_width=interval_width,
                changepoint_range=changepoint_range)
    m = m.fit(dataframe)
    return m
    

def predict_model(dataframe,m):
    forecast = m.predict(dataframe)
    forecast['fact'] = dataframe['y'].reset_index(drop=True)
    
    return forecast


def detect_anomalies(forecast):
    '''
    What it does:  based on rule: label anomaly data point based on whether the actual data is greater than the upper bond of prediction or smaller than the lower bond of the prediction.

    Input: forecast dataframe from Prophet model.
    Output: forecast dataframe with anomlies labeled. 

    '''
    forecasted = forecast[['ds', 'trend', 'yhat',
                           'yhat_lower', 'yhat_upper', 'fact']].copy()

    forecasted['anomaly'] = 0
    forecasted.loc[forecasted['fact'] >
                   forecasted['yhat_upper'], 'anomaly'] = 1
    forecasted.loc[forecasted['fact'] <
                   forecasted['yhat_lower'], 'anomaly'] = -1

    # anomaly importances
    forecasted['importance'] = 0
    forecasted.loc[forecasted['anomaly'] == 1, 'importance'] = \
        (forecasted['fact'] - forecasted['yhat_upper'])/forecast['fact']
    forecasted.loc[forecasted['anomaly'] == -1, 'importance'] = \
        (forecasted['yhat_lower'] - forecasted['fact'])/forecast['fact']

    return forecasted

# Export Anomaly_df is a python Dataframe object.
# MySQL Insert Query includes [tableName], (columns to insert)
# The value stores (col1, col2,col3)
def insert_anomalies_prophet(current_anomlies):
    for index, row in current_anomlies.iterrows():
        connection = mysql.connector.connect(
            host="10.1.2.10",
            user="gyan",
            password="5Gaa$2022",
            database="gyan_db"
        )

        cursor = connection.cursor()

        MySQL_insert_query = "INSERT INTO tb_export_anomaly_df (client_id, stats_timestamp, attribute_name, attribute_value, attribute_label_prophet, attribute_deviation,attribute_mean) VALUES (%s, %s, %s, %s, %s, %s, %s)"

        the_value= (row.client_id, str(row.stats_timestamp), str(row.attribute_label), row.attribute_value, row.attribute_label_prophet, row.attribute_deviation,row.attribute_mean)
        
        try:
            cursor.execute(MySQL_insert_query, the_value)
            connection.commit()
        except:
            print("Record Already Inserted")
            pass

    cursor.close()
    print("Insert Complete")
    
    
#--------------------------------------------------------------------------------------------
#---------------------------------Part 2: Data and DB connetion -----------------------------
#--------------------------------------------------------------------------------------------


# Initiate with Parameters
db_name = "core_stats"
col = "peak_upload_speed"


# Start Database Connection
db_connection = mysql.connector.connect(
    host="10.1.2.10",
    user="gyan",
    password="5Gaa$2022",
    database="gyan_db"
)

# Load data from database and store as pandas Dataframe
df_rand = pd.read_sql(
    'SELECT * FROM gyan_db.core_stats WHERE client_id= "BETBEL01GYN001" AND stats_timestamp>"2022-07-15"'.format(db_name), con=db_connection)



#--------------------------------------------------------------------------------------------
#---------------------------------Part 3: Prophet Model Training ----------------------------
#--------------------------------------------------------------------------------------------

df_rand_prophet = df_rand[["stats_timestamp", "peak_upload_speed"]].rename(
    columns={"stats_timestamp": "ds", "peak_upload_speed": "y"})

rand_list = ['total_attached_user',
             'total_rejected_user', 'peak_upload_speed', 'peak_download_speed',
             'enodeb_shutdown_count', 'handover_failure_count',
             'bearer_active_user_count', 'bearer_rejected_user_count', 'total_users',
             'total_dropped_packets', 'enodeb_connected_count',
             'enodeb_connection_status']


anomaly_df = pd.DataFrame()
for item in rand_list:
    print(item)
    df_rand_prophet = df_rand[["stats_timestamp", item]].rename(
        columns={"stats_timestamp": "ds", item: "y"})
    
    json_path = "prophet_models/"+"BETBEL01GYN001"+str(item)+".json"
    folder_path = "/opt/gyan/gyan-anomaly-detector"
    
    if script_option == "Train":
        
        model = fit_model(df_rand_prophet)

            
        with open(json_path, 'w') as fout:
            fout.write(model_to_json(model))  # Save model

    elif script_option=="Inference":  
      
        with open(json_path, 'r') as fin:
            model = model_from_json(fin.read())  # Load model

        pred = predict_model(df_rand_prophet,model)
        pred_anomalies = detect_anomalies(pred)
        print("Anomaly rate is: ", pred_anomalies["anomaly"].sum()/pred_anomalies.shape[0])
    
    
        sub_anomaly_df = pred_anomalies[(pred_anomalies["anomaly"]==-1) | (pred_anomalies["anomaly"]==1)]
        sub_anomaly_df["attribute_label"]=item
        anomaly_df=anomaly_df.append(sub_anomaly_df, ignore_index = True)


#------------------------------------------------------------------------------------------
#---------------------------------Part 4: Get Current Anomalies and Insert-----------------
#------------------------------------------------------------------------------------------
if script_option=="Inference":
    anomaly_df= anomaly_df.rename(columns= {"ds":"stats_timestamp","yhat":"attribute_mean","anomaly":"attribute_label_prophet","fact":"attribute_value","importance":"attribute_deviation"})
    anomaly_df=anomaly_df.drop(["trend","yhat_lower","yhat_upper"],axis=1)
    anomaly_df["client_id"]= "BETBEL01GYN001"
    
    time_interval = datetime.datetime.now() - datetime.timedelta(minutes=120)
    current_anomlies= anomaly_df[anomaly_df.stats_timestamp > time_interval].reset_index()


    if current_anomlies.shape[0]>0:
        insert_anomalies_prophet(current_anomlies)
        print("Anomalies Insert Completed")
        
        
    else:
        print("No Anomalies Detected")


In [None]:
# pkl_path = "prophet_models/"+"BETBEL01GYN001"+str(item)+".pkl"

# with open(pkl_path, "wb") as f:
# # Pickle the 'Prophet' model using the highest protocol available.
#     pickle.dump(model, f)

# with open(pkl_path, 'rb') as f:
#     model = pickle.load(f)


# save the dataframe
# forecast.to_pickle("path/to/data/forecast.pkl")
# print("*** Data Saved ***")

#fcast = pd.read_pickle("path/to/data/forecast.pkl")


In [None]:

# script_option = "Train"

# if script_option == "Train":
#     with open("BETBEL01GYN001"+str(item)+'.json', 'w') as fout:
#         fout.write(model_to_json(model))  # Save model

# with open("BETBEL01GYN001"+str(item)+'.json', 'r') as fin:
#     m = model_from_json(fin.read())  # Load model


## Base Version

In [6]:

#--------------------------------------------------------------------------------------------
#---------------------------------Part 1: Set Ups -------------------------------------------
#--------------------------------------------------------------------------------------------

from prophet import Prophet
import numpy as np
import pandas as pd
import mysql.connector
from pandas_profiling import ProfileReport
import datetime
from prophet.serialize import model_to_json, model_from_json
import pickle
import warnings
warnings.filterwarnings("ignore")

def fit_model(dataframe, interval_width=0.99, changepoint_range=0.8):
    '''
        Input: 

        Output: a forecasted dataframe includes

    '''

    m = Prophet(daily_seasonality=False, yearly_seasonality=False, weekly_seasonality=False,
                seasonality_mode='multiplicative',
                interval_width=interval_width,
                changepoint_range=changepoint_range)
    m = m.fit(dataframe)
    return m
    

def predict_model(dataframe,m):
    forecast = m.predict(dataframe)
    forecast['fact'] = dataframe['y'].reset_index(drop=True)
    
    return forecast


def detect_anomalies(forecast):
    '''
    What it does:  based on rule: label anomaly data point based on whether the actual data is greater than the upper bond of prediction or smaller than the lower bond of the prediction.

    Input: forecast dataframe from Prophet model.
    Output: forecast dataframe with anomlies labeled. 

    '''
    forecasted = forecast[['ds', 'trend', 'yhat',
                           'yhat_lower', 'yhat_upper', 'fact']].copy()

    forecasted['anomaly'] = 0
    forecasted.loc[forecasted['fact'] >
                   forecasted['yhat_upper'], 'anomaly'] = 1
    forecasted.loc[forecasted['fact'] <
                   forecasted['yhat_lower'], 'anomaly'] = -1

    # anomaly importances
    forecasted['importance'] = 0
    forecasted.loc[forecasted['anomaly'] == 1, 'importance'] = \
        (forecasted['fact'] - forecasted['yhat_upper'])/forecast['fact']
    forecasted.loc[forecasted['anomaly'] == -1, 'importance'] = \
        (forecasted['yhat_lower'] - forecasted['fact'])/forecast['fact']

    return forecasted

# Export Anomaly_df is a python Dataframe object.
# MySQL Insert Query includes [tableName], (columns to insert)
# The value stores (col1, col2,col3)
def insert_anomalies_prophet(current_anomlies):
    for index, row in current_anomlies.iterrows():
        connection = mysql.connector.connect(
            host="10.1.2.10",
            user="gyan",
            password="5Gaa$2022",
            database="gyan_db"
        )

        cursor = connection.cursor()

        MySQL_insert_query = "INSERT INTO tb_export_anomaly_df (client_id, stats_timestamp, attribute_name, attribute_value, attribute_label_prophet, attribute_deviation,attribute_mean) VALUES (%s, %s, %s, %s, %s, %s, %s)"

        the_value= (row.client_id, str(row.stats_timestamp), str(item), row.attribute_value, row.attribute_label_prophet, row.attribute_deviation,row.attribute_mean)
        
        try:
            cursor.execute(MySQL_insert_query, the_value)
            connection.commit()
        except:
            print("Record Inserted")
            pass

    cursor.close()
    print("Insert Complete")
    
    
#--------------------------------------------------------------------------------------------
#---------------------------------Part 2: Data and DB connetion -----------------------------
#--------------------------------------------------------------------------------------------


# Initiate with Parameters
db_name = "core_stats"
col = "peak_upload_speed"


# Start Database Connection
db_connection = mysql.connector.connect(
    host="10.1.2.10",
    user="gyan",
    password="5Gaa$2022",
    database="gyan_db"
)

# Load data from database and store as pandas Dataframe
df_rand = pd.read_sql(
    'SELECT * FROM gyan_db.core_stats WHERE client_id= "BETBEL01GYN001" AND stats_timestamp>"2022-07-15"'.format(db_name), con=db_connection)



#--------------------------------------------------------------------------------------------
#---------------------------------Part 3: Prophet Model Training ----------------------------
#--------------------------------------------------------------------------------------------

df_rand_prophet = df_rand[["stats_timestamp", "peak_upload_speed"]].rename(
    columns={"stats_timestamp": "ds", "peak_upload_speed": "y"})

rand_list = ['total_attached_user',
             'total_rejected_user', 'peak_upload_speed', 'peak_download_speed',
             'enodeb_shutdown_count', 'handover_failure_count',
             'bearer_active_user_count', 'bearer_rejected_user_count', 'total_users',
             'total_dropped_packets', 'enodeb_connected_count',
             'enodeb_connection_status']


anomaly_df = pd.DataFrame()
for item in rand_list:
    print(item)
    df_rand_prophet = df_rand[["stats_timestamp", item]].rename(
    columns={"stats_timestamp": "ds", item: "y"})
    model = fit_model(df_rand_prophet)

    pred = predict_model(df_rand_prophet,model)
    pred_anomalies = detect_anomalies(pred)
    print("Anomaly rate is: ", pred_anomalies["anomaly"].sum()/pred_anomalies.shape[0])


    sub_anomaly_df = pred_anomalies[(pred_anomalies["anomaly"]==-1) | (pred_anomalies["anomaly"]==1)]

    anomaly_df=anomaly_df.append(sub_anomaly_df, ignore_index = True)


#------------------------------------------------------------------------------------------
#---------------------------------Part 4: Get Current Anomalies and Insert-----------------
#------------------------------------------------------------------------------------------

anomaly_df= anomaly_df.rename(columns= {"ds":"stats_timestamp","yhat":"attribute_mean","anomaly":"attribute_label_prophet","fact":"attribute_value","importance":"attribute_deviation"})
anomaly_df=anomaly_df.drop(["trend","yhat_lower","yhat_upper"],axis=1)
anomaly_df["client_id"]= "BETBEL01GYN001"

time_interval = datetime.datetime.now() - datetime.timedelta(minutes=5)
current_anomlies= anomaly_df[anomaly_df.stats_timestamp > time_interval].reset_index()


if current_anomlies.shape[0]>0:
    insert_anomalies_prophet(current_anomlies)
    print("Anomalies Insert Completed")
else:
    print("No Anomalies Detected")


total_attached_user
Anomaly rate is:  0.0010148751703540466
total_rejected_user
Anomaly rate is:  0.00031896076782555744
peak_upload_speed
Anomaly rate is:  -0.0005509322353350538
peak_download_speed
Anomaly rate is:  0.0030156290776234525
enodeb_shutdown_count
Anomaly rate is:  0.00040595006814161857
handover_failure_count
Anomaly rate is:  0.00020297503407080928
bearer_active_user_count
Anomaly rate is:  0.00028996433438687044
bearer_rejected_user_count
Anomaly rate is:  0.00023197146750949634
total_users
Anomaly rate is:  0.00031896076782555744
total_dropped_packets
Anomaly rate is:  0.0009278858700379854
enodeb_connected_count
Anomaly rate is:  0.0002609679009481834
enodeb_connection_status
Anomaly rate is:  0.0002609679009481834
No Anomalies Detected
