# Load Data

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)
from CellPAD.evaluator import evaluate
from CellPAD.controller import DropController
from CellPAD.synthsiser import DropSynthesiser
import pandas as pd
import platform
df = pd.read_csv("play_data/kpi_hour_ttml_from2022060100.csv")
df = df[["DATE", "HOUR", "TRAFFIC4G", "INIT_ERAB_SR"]]
end_df = df.shape[0]
split_point = end_df - 168
df["DATE"] = pd.to_datetime(df['DATE'], format='%d/%m/%Y').dt.strftime('%Y/%m/%d')
df["HOUR"]  = df["HOUR"].apply(str)
df["Time"] = df["DATE"] + " " + df["HOUR"] + ":00"
timestamps_train, series_train = df["Time"][:split_point].values, df["TRAFFIC4G"][:split_point].values
timestamps_test, series_test = df["Time"][split_point:].values, df["TRAFFIC4G"][split_point:].values

# data_path = "./data/sd.csv"
# df = pd.read_csv(data_path)
# split_point = df.shape[0] - 168
# timestamps_train, series_train = df["Time"][:split_point].values, df["KPI"][:split_point].values
# timestamps_test, series_test = df["Time"][split_point:].values, df["KPI"][split_point:].values
# inject anomalies
syner = DropSynthesiser(raw_series=series_train, period_len=168)
syn_series, syn_labels = syner.syn_drop()

# detect drop
controller = DropController(timestamps=timestamps_train,
                            series=syn_series,
                            period_len=168,
                            feature_types=["Indexical", "Numerical"],
                            feature_time_grain=["Weekly"],
                            feature_operations=["Wma", "Ewma", "Mean", "Median"],
                            bootstrap_period_cnt=2,
                            to_remove_trend=True,
                            trend_remove_method="center_mean",
                            anomaly_filter_method="gauss",
                            anomaly_filter_coefficient=3.0)
controller.detect(predictor="RF")
results = controller.get_results()

auc, prauc = evaluate(results["drop_scores"][2*168:], syn_labels[2*168:])

from model import evaluate_regression_model 

model = controller.get_model()
dict_storage = controller.get_dict_storage()
end = dict_storage["normal_features_matrix"].shape[0]
start = end - 168*10
X_test = dict_storage["normal_features_matrix"][start:]
y_test = dict_storage["normal_response_series"][start:]

dict = evaluate_regression_model(model, X_test, y_test)
dict

# Test the model with test data

In [5]:
from model import build_model

test_timestamps, test_series = df["Time"].values, df["TRAFFIC4G"].values
test_model, test_metrics, test_features = build_model(test_timestamps, test_series, True)

In [29]:
from CellPAD.preprocessor import Preprocessor 
preprocessor = Preprocessor()

#split_point = end of array - 168
X_test1 = test_features[split_point:]
y_test1 = preprocessor.remove_trend(test_series[split_point:], 168, method="past_mean")
evaluate_regression_model(model,X_test1, y_test1)

{'r2_score': 0.8425570224738905,
 'mean_squared_error': 0.030486416850683578,
 'Mean_absolute_error': 0.10958298776694726}