In [1]:
# Initiate DB
from src.db_models.base_model import db
from db_models.sample import Sample
from src.db_models.ml_model import MlModel
from src.db_models.prediction import Prediction

# Connect to DB
db.connect()
# Migrate tables
db.create_tables([MlModel, Sample, Prediction])


In [2]:
# Get data
import pandas as pd

data = pd.read_csv("../data/Boiler_emulator_dataset_new.csv")

predictions = data["Class"]
features = data.drop(columns=['Condition', 'Class'])

In [3]:
# Create Primary model

import os
import pickle
from src.ml_models.mock_model import MockModel
from src.utils.utils import ROOT_DIR

model = MockModel()

model.fit(features, predictions)

model_name = hash(model)
model_file = f"pickles/{model_name}.pkl"

pickle.dump(model, open(os.path.join(ROOT_DIR, model_file), "wb"))

MlModel.update(is_active=False).execute()
model_record = MlModel.create(pickle_path=model_file, is_active=True)

# Test Model
model.predict(features)

['Scaling',
 'ExcessAir',
 'Lean',
 'Nominal',
 'Nominal',
 'Lean',
 'Nominal',
 'Fouling',
 'Nominal',
 'Lean',
 'ExcessAir',
 'Nominal',
 'Nominal',
 'ExcessAir',
 'Scaling',
 'Fouling',
 'Lean',
 'Lean',
 'Nominal',
 'Lean',
 'Fouling',
 'Scaling',
 'Lean',
 'Lean',
 'ExcessAir',
 'Fouling',
 'Nominal',
 'Lean',
 'Nominal',
 'Fouling',
 'Fouling',
 'ExcessAir',
 'Nominal',
 'Lean',
 'Fouling',
 'ExcessAir',
 'Lean',
 'ExcessAir',
 'Lean',
 'Fouling',
 'Fouling',
 'ExcessAir',
 'ExcessAir',
 'ExcessAir',
 'Nominal',
 'ExcessAir',
 'Fouling',
 'ExcessAir',
 'Lean',
 'Scaling',
 'ExcessAir',
 'ExcessAir',
 'Lean',
 'Nominal',
 'Fouling',
 'Scaling',
 'Scaling',
 'Scaling',
 'Scaling',
 'Scaling',
 'Fouling',
 'Nominal',
 'Lean',
 'Scaling',
 'Fouling',
 'ExcessAir',
 'Lean',
 'Lean',
 'Scaling',
 'Lean',
 'Scaling',
 'Nominal',
 'ExcessAir',
 'ExcessAir',
 'Fouling',
 'ExcessAir',
 'Nominal',
 'Scaling',
 'Scaling',
 'Scaling',
 'Fouling',
 'Fouling',
 'ExcessAir',
 'ExcessAir',
 'Nomi

In [4]:
# Fill DB with data

# Get 500 records per class
all_classes = data["Class"].unique()
sample_data = pd.DataFrame()

for c in all_classes:
    sample_data = pd.concat([sample_data, data[data["Class"] == c].sample(frac=1)[:500].reset_index(drop=True)])

# Reshuffle
sample_data = sample_data.sample(frac=1).reset_index(drop=True)

# Insert to DB
for index, row in sample_data.iterrows():
    sample = Sample.create(
        fuel_mdot = row["Fuel_Mdot"],
        tair = row["Tair"],
        treturn = row["Treturn"],
        tsupply = row["Tsupply"],
        water_mdot = row["Water_Mdot"])
    
    prediction = Prediction.create(
        predicted = row["Class"],
        feedback = None,
        sample = sample,
        model = model_record)
        
sample_data

Unnamed: 0,Fuel_Mdot,Tair,Treturn,Tsupply,Water_Mdot,Condition,Class
0,4,289,333.0,345.613500,3.5,F = 0.26,Fouling
1,3,285,333.0,343.196830,7.5,%=0.3,ExcessAir
2,2,297,333.0,335.719503,9.0,S = 0.36,Scaling
3,3,299,333.0,352.847864,3.5,%=0.45,ExcessAir
4,4,297,333.0,341.369913,5.0,S = 0.16,Scaling
...,...,...,...,...,...,...,...
2495,4,287,333.0,345.350208,7.5,%=0.05,Lean
2496,4,293,333.0,343.489976,8.5,%=0.1,Nominal
2497,2,297,333.0,342.718794,8.5,%=0.20,ExcessAir
2498,2,289,333.0,356.742791,3.0,%=0.40,ExcessAir


In [5]:
db.close()

True