## Inference

Here I will present a simple inference pipeline.
Load and make prediction from raw text

In [1]:
import pandas as pd
import numpy as np
import re


### Load the model

In [2]:
from sklearn.pipeline import Pipeline

In [3]:
def customize_text(text):
    def transform(t):
        t = t.lower()
        t = t.replace('\n', ' ').replace('\r', '')
        t = t.replace('\t', ' ')
        t = t.replace("/*", ' ')
        t = t.replace("2020.2.3", ' ')
        t = t.replace("*/", ' ')
        t = t.replace("*", ' star')
        t = re.sub(r"[',.;@#?!&$=())]+\ *", " ", t)
        t = t.replace("  ", ' ').strip()
        return t
    text = [transform(t) for t in text]
    return text

In [4]:
import joblib

joblib_pipe_xgb = "./models/pipe_xgb.sav"
pipe_xgb_model = joblib.load(joblib_pipe_xgb)

### Make prediction from row query text

In [5]:
query_text = ["SELECT *"]
pred_z = pipe_xgb_model.predict(query_text)
pred_y = np.exp(pred_z) - 1 
print(f"Query text              : {query_text}")
print(f"Predicted query time    : {pred_y[0]:.2f}")

Query text              : ['SELECT *']
Predicted query time    : 306.19


In [6]:
query_text = ["SELECT * # count(*) from information_schema.TABLES WHERE TABLE_SCHEMA = 'mysql' AND TABLE_NAME = 'rds_heartbeat2'"]
pred_z = pipe_xgb_model.predict(query_text)
pred_y = np.exp(pred_z) - 1 
print(f"Query text              : {query_text}")
print(f"Predicted query time    : {pred_y[0]:.2f}")

Query text              : ["SELECT * # count(*) from information_schema.TABLES WHERE TABLE_SCHEMA = 'mysql' AND TABLE_NAME = 'rds_heartbeat2'"]
Predicted query time    : 4991.63


In [7]:
query_text = ['/* ApplicationName=DataGrip 2020.2.3 */ select * from mysql.slow_log order by start_time desc']
pred_z = pipe_xgb_model.predict(query_text)
pred_y = np.exp(pred_z) - 1 
print(f"Query text              : {query_text}")
print(f"Predicted query time    : {pred_y[0]:.2f}")

Query text              : ['/* ApplicationName=DataGrip 2020.2.3 */ select * from mysql.slow_log order by start_time desc']
Predicted query time    : 163560.11
