# Load required libs

In [1]:
%cd ..
%pwd

c:\VENV\api_prediction


'c:\\VENV\\api_prediction'

In [2]:
import sys
sys.path.append('./src')
from src.data_processor import RawDataProcessor
from src.problem_config import create_prob_config 
prob_config = create_prob_config("phase-2", "prob-2")

In [3]:
from mlflow.models.signature import infer_signature
import mlflow

def log_model_to_tracker_lgbm(model, metrics, desc):
    MLFLOW_TRACKING_URI = 'http://192.168.88.113:5000'
    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
    mlflow.set_experiment("phase-2_prob-2_lgbm")
    MLFLOW_MODEL_PREFIX = "model"
    mlflow.start_run(description=desc)
    mlflow.log_metrics(metrics)
    mlflow.log_params(model.get_params())
    signature = infer_signature(test_x.astype(np.float64), predictions)
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path=MLFLOW_MODEL_PREFIX,
        signature=signature,
        pip_requirements ='src/requirements.txt'
        #registered_model_name="phase-1_prob-1_model-1"
    )

    experimentid = mlflow.active_run().info.run_id
    mlflow.end_run()
    return experimentid

# Prepare datasets

In [4]:
# Import xtran and ytrain
import pandas as pd
import numpy as np

training_data = pd.read_parquet(prob_config.raw_data_path)

training_data, category_index = RawDataProcessor.build_category_features(
            training_data, prob_config.categorical_cols
        )

target_col = prob_config.target_col
train_x = training_data.drop([target_col], axis=1)
train_y = training_data[[target_col]]

import pickle
with open(prob_config.category_index_path, "wb") as f:
    pickle.dump(category_index, f)

In [5]:
from lightgbm import LGBMClassifier

#eval_set = [(train_x, train_y), (test_x, test_y)]
model0 = LGBMClassifier(objective="multiclass", random_state=123)
model0.fit(train_x, train_y, verbose=False) #eval_set=eval_set,

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [6]:
sample = training_data.sample(1000)

test_x = sample.drop([target_col], axis=1)
test_y = sample[[target_col]]

In [7]:
from sklearn.metrics import accuracy_score
predictions = model0.predict(test_x)
#predictions = s.predict_model(best[0], data = test_x)["prediction_label"]
accuracy = accuracy_score(predictions, test_y)
metrics = {"accuracy_score": accuracy}
print(f"metrics: {metrics}")

metrics: {'accuracy_score': 0.894}


In [46]:
%%timeit -n 10
model0.predict_proba(test_x)

8.98 ms ± 3.32 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [43]:
import lleaves
model_path = prob_config.data_path / "phase2_2_lgbm.txt"
llvm_model_path = prob_config.data_path / "phase2_2_lleaves"
model0.booster_.save_model(filename=model_path)
llvm_model = lleaves.Model(model_file=model_path)
llvm_model.compile()

In [56]:
np.save('data.npy', model0.classes_)

In [57]:
np.load('data.npy', allow_pickle=True) # load

array(['Denial of Service', 'Exploits', 'Information Gathering',
       'Malware', 'Normal', 'Other'], dtype=object)

In [50]:
type(model0.classes_)

numpy.ndarray

In [45]:
%%timeit -n 10
z = llvm_model.predict(test_x)
labels = np.argmax(z, axis=1)
classes = model0.classes_
labels = [classes[i] for i in labels]

4.45 ms ± 236 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
run_description = """
### Header
LGBM model, First Base Model Prob2
Model: LGBM
    """
log_model_to_tracker_lgbm(model0, metrics, run_description)

2023/07/06 17:05:03 INFO mlflow.tracking.fluent: Experiment with name 'phase-2_prob-2_lgbm' does not exist. Creating a new experiment.


'fd76f1a988fa4abfb94eee698fb18fb9'

# Drift Detection 

In [34]:
# Save referent for drift detection.
X_baseline_df = pd.DataFrame(X_baseline, columns=col_to_detect)
X_baseline_df.to_parquet(prob_config.driff_ref_path, index=False)

# Model drift

In [11]:
import pandas as pd
import redis
import pickle
# Load data for problem 1
rc2 = redis.Redis(host='localhost', db=2, port=6379)

captured_x = pd.DataFrame()
for key in rc2.keys():
    captured_data = pickle.loads(rc2.get(key))
    captured_x = pd.concat([captured_x, captured_data])

captured_x.drop_duplicates(inplace=True, ignore_index=True)

#captured_x = apply_category_features(
#    raw_df=captured_x[train_x0.columns],
#    categorical_cols=prob_config.categorical_cols,
#    category_index=category_index,
#)

In [84]:
np_captured_x = captured_x.copy() #drop(["is_drift", "batch_id"], axis=1)
np_captured_x = np_captured_x.astype(train_x.dtypes.to_dict())
np_captured_x['label'].fillna(-1, inplace=True)

In [12]:
allkey = rc2.keys()

# Test API

In [58]:
rows = []
test = pickle.loads(rc2.get(allkey[28]))

for index, row in test.iterrows():
    rows.append(row.to_list())

data = {
  "id": "123",
  "rows": rows,
  "columns": test.columns.to_list()
}

import requests
#response = requests.post('http://14.225.205.204:5040/phase-2/prob-1/predict', json=data)
response = requests.post('http://localhost:8000/phase-2/prob-2/predict', json=data)
#response = requests.post('http://14.225.205.204:80/phase-2/prob-2/predict', json=data)
print(response.text)

{"id": "123", "predictions": ["Other", "Normal", "Normal", "Denial of Service", "Other", "Other", "Exploits", "Denial of Service", "Exploits", "Normal", "Normal", "Other", "Information Gathering", "Denial of Service", "Exploits", "Denial of Service", "Exploits", "Other", "Normal", "Normal", "Other", "Denial of Service", "Denial of Service", "Normal", "Exploits", "Exploits", "Normal", "Denial of Service", "Normal", "Exploits", "Exploits", "Normal", "Exploits", "Other", "Information Gathering", "Information Gathering", "Normal", "Normal", "Exploits", "Normal", "Exploits", "Normal", "Normal", "Exploits", "Other", "Denial of Service", "Other", "Other", "Denial of Service", "Exploits", "Other", "Normal", "Exploits", "Exploits", "Normal", "Denial of Service", "Denial of Service", "Other", "Denial of Service", "Other", "Normal", "Other", "Normal", "Other", "Normal", "Exploits", "Denial of Service", "Exploits", "Normal", "Exploits", "Denial of Service", "Other", "Normal", "Exploits", "Normal",

In [6]:
# Clear cache

from aiocache import Cache
from aiocache.serializers import PickleSerializer

cacherequest = Cache(Cache.REDIS, endpoint="localhost", port=6379, db=2, serializer=PickleSerializer())
await cacherequest.clear()

True