In [1]:
from quartic_sdk import APIClient
import pandas as pd
from datetime import datetime
from pytz import timezone
import xgboost as xgb

In [2]:
client = APIClient("https://demo.quartic.ai/",
                   username="aniket@demo.quartic.ai",
                   password="aniketdemo123!")

In [3]:
assets = client.assets()

In [4]:
asset = assets.get("name", "15000 L Bioreactor")
asset_tags = client.tags(asset_id=asset.id)

# Train an Xgboost Classifier


In [37]:
# training set Jan 1st 2019 to May 30th 2019 

start_time = int(datetime(2019, 1, 1, 0, 0, 0, 0, timezone('UTC')).timestamp() * 1000)
stop_time = int(datetime(2019, 5, 3, 23, 0, 0, 0, timezone('UTC')).timestamp() * 1000)

asset_data_itr = asset.data(start_time=start_time, stop_time=stop_time)
df = pd.DataFrame()
for next_df in asset_data_itr:
    df = pd.concat([df, next_df])

In [38]:
df.columns
#24797 -> Batch phase
#24799 -> Vessel Temperature


Index(['24971', '25038', '25039', '24923', '24924', '24795', '24796', '24797',
       '24798', '24799', '24800', '24801', '24802', '24803', '24804', '24805',
       '24806', '24807', '24808', '24809', '24810', '24811', '24812', '24813',
       '24814', '24815', '24816'],
      dtype='object')

In [39]:
def label_temperature(row):
    if row["24797"] == 5 and row["24799"] > 36.3:
        return 1
    else:
        return 0
    
df["target"] = df.apply (lambda row: label_temperature(row), axis=1)

In [111]:
target_col = "target"
feature_cols = [col for col in df.columns if col != "target"]


In [41]:
df_X = df[feature_cols]
df_y = df[[target_col]]
dtrain = xgb.DMatrix(df_X, label=df_y)

num_round = 100
param = {'max_depth': 100, 'eta': 0.2, 'objective': 'binary:logistic'}
param['nthread'] = 4
param['eval_metric'] = 'mae'

classifier = xgb.train(param, dtrain, num_round)

# Testing trained model

In [124]:
# testing set June 1st 2019 to September 30th 2019 

start_time_test = int(datetime(2019, 6, 1, 0, 0, 0, 0, timezone('UTC')).timestamp() * 1000)
stop_time_test = int(datetime(2019, 9, 30, 23, 0, 0, 0, timezone('UTC')).timestamp() * 1000)

asset_data_itr_test = asset.data(start_time=start_time_test, stop_time=stop_time_test)
df_test = pd.DataFrame()
for next_df in asset_data_itr_test:
    df_test = pd.concat([df_test, next_df])

In [125]:
preds = classifier.predict(xgb.DMatrix(df_test))

# Convert probabilties back to the original class labels
preds[preds >= 0.5] = 1
preds[preds < 0.5] = 0
prob_to_label_pred = pd.Series(preds)

# Precision

In [126]:
df_test["pred"] = list(prob_to_label_pred)
df_test["target"] = df_test.apply (lambda row: label_temperature(row), axis=1)
(len(df_test) - len(df_test[["pred", "target"]][df_test["pred"] != df_test["target"]]))/len(df_test)

0.9988896797153025