## Train & Inference Version

In [1]:
import pandas as pd
#df = pd.read_csv("Existing_rand_data_after_July15.csv")[30000:].reset_index(drop=True)

df = pd.read_csv("randomized_data_with_distribution_1year_V2.csv")[65000:80000].reset_index(drop=True)

In [2]:

#script_option = "Train"
script_option = "Inference"
# json_path_folder= "prophet_models/Random_Data_V1/"
# anomaly_graphs_folder="Anomaly_graphs/V1_No_Distribution/"


json_path_folder= "prophet_models/Random_Data_V2/"
anomaly_graphs_folder="Anomaly_graphs/V2_Distribution/"


#--------------------------------------------------------------------------------------------
#---------------------------------Part 1: Set Ups -------------------------------------------
#--------------------------------------------------------------------------------------------

from prophet import Prophet
import numpy as np


import mysql.connector
from pandas_profiling import ProfileReport
import datetime
from prophet.serialize import model_to_json, model_from_json
import pickle
import warnings
warnings.filterwarnings("ignore")

def fit_model(dataframe, interval_width=0.99, changepoint_range=0.8):
    '''
        Input: 

        Output: a forecasted dataframe includes

    '''

    m = Prophet(daily_seasonality=False, yearly_seasonality=False, weekly_seasonality=False,
                seasonality_mode='multiplicative',
                interval_width=interval_width,
                changepoint_range=changepoint_range)
    m = m.fit(dataframe)
    return m
    


def plot_anomalies(forecasted):
    '''

    '''
    import altair as alt
    interval = alt.Chart(forecasted).mark_area(interpolate="basis", color='#7FC97F').encode(
        x=alt.X('ds:T',  title='date'),
        y='yhat_upper',
        y2='yhat_lower',
        tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper']
    ).interactive().properties(
        title='Anomaly Detection'
    )

    fact = alt.Chart(forecasted[forecasted.anomaly == 0]).mark_circle(size=15, opacity=0.7, color='Black').encode(
        x='ds:T',
        y=alt.Y('fact', title='sales'),
        tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper']
    ).interactive()

    anomalies = alt.Chart(forecasted[forecasted.anomaly != 0]).mark_circle(size=30, color='Red').encode(
        x='ds:T',
        y=alt.Y('fact', title='PeakUpload Speed'),
        tooltip=['ds', 'fact', 'yhat_lower', 'yhat_upper'],
        size=alt.Size('importance', legend=None)
    ).interactive()

    return alt.layer(interval, fact, anomalies)\
              .properties(width=870, height=450)\
              .configure_title(fontSize=20)


def predict_model(dataframe,m):
    forecast = m.predict(dataframe)
    forecast['fact'] = dataframe['y'].reset_index(drop=True)
    
    return forecast


def detect_anomalies(forecast):
    '''
    What it does:  based on rule: label anomaly data point based on whether the actual data is greater than the upper bond of prediction or smaller than the lower bond of the prediction.

    Input: forecast dataframe from Prophet model.
    Output: forecast dataframe with anomlies labeled. 

    '''
    forecasted = forecast[['ds', 'trend', 'yhat',
                           'yhat_lower', 'yhat_upper', 'fact']].copy()

    forecasted['anomaly'] = 0
    forecasted.loc[forecasted['fact'] >
                   forecasted['yhat_upper'], 'anomaly'] = 1
    forecasted.loc[forecasted['fact'] <
                   forecasted['yhat_lower'], 'anomaly'] = -1

    # anomaly importances
    forecasted['importance'] = 0
    forecasted.loc[forecasted['anomaly'] == 1, 'importance'] = \
        (forecasted['fact'] - forecasted['yhat_upper'])/forecast['fact']
    forecasted.loc[forecasted['anomaly'] == -1, 'importance'] = \
        (forecasted['yhat_lower'] - forecasted['fact'])/forecast['fact']

    return forecasted

# Export Anomaly_df is a python Dataframe object.
# MySQL Insert Query includes [tableName], (columns to insert)
# The value stores (col1, col2,col3)
def insert_anomalies_prophet(current_anomlies):
    for index, row in current_anomlies.iterrows():
        connection = mysql.connector.connect(
            host="10.1.2.10",
            user="gyan",
            password="5Gaa$2022",
            database="gyan_db"
        )

        cursor = connection.cursor()

        MySQL_insert_query = "INSERT INTO tb_export_anomaly_df (client_id, stats_timestamp, attribute_name, attribute_value, attribute_label_prophet, attribute_deviation,attribute_mean) VALUES (%s, %s, %s, %s, %s, %s, %s)"

        the_value= (row.client_id, str(row.stats_timestamp), str(row.attribute_label), row.attribute_value, row.attribute_label_prophet, row.attribute_deviation,row.attribute_mean)
        
        try:
            cursor.execute(MySQL_insert_query, the_value)
            connection.commit()
        except:
            print("Record Already Inserted")
            pass

    cursor.close()
    print("Insert Complete")
    
    
#--------------------------------------------------------------------------------------------
#---------------------------------Part 2: Data and DB connetion -----------------------------
#--------------------------------------------------------------------------------------------



#--------------------------------------------------------------------------------------------
#---------------------------------Part 3: Prophet Model Training ----------------------------
#--------------------------------------------------------------------------------------------


rand_list = ['total_attached_user',
             'total_rejected_user', 'peak_upload_speed', 'peak_download_speed',
             'enodeb_shutdown_count', 'handover_failure_count',
             'bearer_active_user_count', 'bearer_rejected_user_count', 'total_users',
             'total_dropped_packets', 'enodeb_connected_count',
             'enodeb_connection_status']


anomaly_df = pd.DataFrame()
for item in rand_list:
    print(item)
    df_rand_prophet = df[["stats_timestamp", item]].rename(
        columns={"stats_timestamp": "ds", item: "y"})

    json_path = json_path_folder+"RAND001"+str(item)+".json"

    if script_option == "Train":

        model = fit_model(df_rand_prophet)


        with open(json_path, 'w') as fout:
            fout.write(model_to_json(model))  # Save model

    elif script_option=="Inference":  

        with open(json_path, 'r') as fin:
            model = model_from_json(fin.read())  # Load model

        pred = predict_model(df_rand_prophet,model)
        pred_anomalies = detect_anomalies(pred)
        print("Anomaly rate is: ", pred_anomalies["anomaly"].sum()/pred_anomalies.shape[0])


        sub_anomaly_df = pred_anomalies[(pred_anomalies["anomaly"]==-1) | (pred_anomalies["anomaly"]==1)]
        sub_anomaly_df["attribute_label"]=item
        anomaly_df=anomaly_df.append(sub_anomaly_df, ignore_index = True)


        chart = plot_anomalies(pred_anomalies[:5000])

        chart.save(anomaly_graphs_folder+'Anomaly_{}.html'.format(item))


#         chart2 = plot_anomalies(pred_anomalies[5000:10000])

#         chart2.save("Anomaly_graphs/V1_Long_period_anomalies/"+'Anomaly_{}.html'.format(item))

#------------------------------------------------------------------------------------------
#---------------------------------Part 4: Get Current Anomalies and Insert-----------------
#------------------------------------------------------------------------------------------
if script_option=="Inference":
    anomaly_df= anomaly_df.rename(columns= {"ds":"stats_timestamp","yhat":"attribute_mean","anomaly":"attribute_label_prophet","fact":"attribute_value","importance":"attribute_deviation"})
    anomaly_df=anomaly_df.drop(["trend","yhat_lower","yhat_upper"],axis=1)
    anomaly_df["client_id"]= "RAND001"

    time_interval = datetime.datetime.now() - datetime.timedelta(minutes=120)
    current_anomlies= anomaly_df[anomaly_df.stats_timestamp > time_interval].reset_index()


    if current_anomlies.shape[0]>0:
        insert_anomalies_prophet(current_anomlies)
        print("Anomalies Insert Completed")


    else:
        print("No Anomalies Detected")


total_attached_user
total_rejected_user
peak_upload_speed
peak_download_speed
enodeb_shutdown_count
handover_failure_count
bearer_active_user_count
bearer_rejected_user_count
total_users
total_dropped_packets
enodeb_connected_count
enodeb_connection_status
