## Initialize Fiddler Client


In [9]:
import fiddler as fdl
import logging

verbose = False

if verbose:
    logging.basicConfig(level=logging.DEBUG)

client = fdl.FiddlerApi(verbose=verbose)

## Load dataset


In [10]:
import pandas as pd
df = pd.read_csv('/app/fiddler_samples/samples/datasets/winequality/train.csv')
df_schema = fdl.DatasetInfo.from_dataframe(df, max_inferred_cardinality=1000)


In [None]:
df_schema

## Upload dataset

In [11]:
if 'wine_quality' not in client.list_datasets():
    upload_result = client.upload_dataset(
        dataset={'train': df}, 
        dataset_id='wine_quality')



## Create model schema

In [12]:
target = 'quality'
train_input = df.drop(columns=['row_id', 'quality'])
train_target = df[target]

feature_columns = list(train_input.columns)

model_info = fdl.ModelInfo.from_dataset_info(
    dataset_info=client.get_dataset_info('wine_quality'),
    target=target, 
    features=feature_columns,
    display_name='sklearn model',
    description='this is a sklearn model from tutorial'
)

## Train model

In [13]:
import sklearn.linear_model
import sklearn.pipeline
import sklearn.preprocessing


regressor = sklearn.linear_model.LinearRegression()

full_model = sklearn.pipeline.Pipeline(steps=[
        ('standard_scaling', sklearn.preprocessing.StandardScaler()),
        ('model_name', regressor),
    ])

full_model.fit(train_input, train_target)
full_model.predict(train_input)


array([6.50506782, 6.65427237, 5.35208865, ..., 5.27678525, 6.36560789,
       4.44488096])

## Save model and schema

In [14]:
import pathlib
import shutil
import pickle
import yaml

project_id = 'tutorial'
model_id = 'wine_quality_model'

model_dir = pathlib.Path(model_id)
shutil.rmtree(model_dir, ignore_errors=True)
model_dir.mkdir()

with open(model_dir / 'model.pkl', 'wb') as pkl_file:
    pickle.dump(full_model, pkl_file)

with open(model_dir / 'model.yaml', 'w') as yaml_file:
    yaml.dump({'model': model_info.to_dict()}, yaml_file)


## Write package.py wrapper

In [15]:
%%writefile wine_quality_model/package.py

import pickle
from pathlib import Path
import pandas as pd
import logging
from flask import Flask, request

PACKAGE_PATH = Path(__file__).parent

class SklearnModelPackage:
    is_classifier = False
    output_columns = ['predicted_quality']

    def __init__(self):
        with open(PACKAGE_PATH / 'model.pkl', 'rb') as infile:
            self.model = pickle.load(infile)

    def predict(self, input_df):
        #logging.info(f'log raw request {request.data}')
        #logging.info(f'log dataframe {input_df}')
        f = self.model.predict if not self.is_classifier else self.model.predict_proba
        return pd.DataFrame(f(input_df), columns=self.output_columns)
    
def get_model():
    return SklearnModelPackage()



Writing wine_quality_model/package.py


In [16]:
!ls -l wine_quality_model

total 12
-rw-r--r-- 1 jovyan users 1408 Oct  5 18:23 model.pkl
-rw-r--r-- 1 jovyan users 1778 Oct  5 18:23 model.yaml
-rw-r--r-- 1 jovyan users  731 Oct  5 18:25 package.py


## Test package.py locally before uploading

In [17]:
#
def my_import(name):
    components = name.split('.')
    mod = __import__(components[0])
    for comp in components[1:]:
        mod = getattr(mod, comp)
    return mod

from importlib import reload

model_class = my_import('wine_quality_model.package')
reload(model_class)

model_class.get_model().predict(train_input[0:5])

AttributeError: module 'wine_quality_model' has no attribute 'package'

## Upload model

In [18]:
client.delete_model(project_id, model_id)
client.upload_model_package(model_dir, project_id, model_id)

## Run model

In [19]:
import time

chunk = 2  #chunk row size
list_df = [train_input[i:i+chunk] for i in range(0, train_input.shape[0], chunk)]
for row in list_df:
    result = client.run_model(project_id, model_id, row, log_events=True)
    print(result)
    time.sleep(10)

   predicted_quality
0           6.505068
1           6.654272
   predicted_quality
0           5.352089
1           6.377989
   predicted_quality
0           5.834986
1           6.697744


KeyboardInterrupt: 

In [60]:
client.get_model_info(project_id, model_id)

ModelInfo:
  display_name: sklearn model
  description: this is a sklearn model from tutorial
  input_type: ModelInputType.TABULAR
  model_task: ModelTask.REGRESSION
  inputs:
                      column  dtype count(possible_values) is_nullable  \
    0          fixed acidity  FLOAT                              False   
    1       volatile acidity  FLOAT                              False   
    2            citric acid  FLOAT                              False   
    3         residual sugar  FLOAT                              False   
    4              chlorides  FLOAT                              False   
    5    free sulfur dioxide  FLOAT                              False   
    6   total sulfur dioxide  FLOAT                              False   
    7                density  FLOAT                              False   
    8                     pH  FLOAT                              False   
    9              sulphates  FLOAT                              False   
    10    

In [19]:
inp = train_input[1: 5]
inp.to_dict(orient='records')

d = [y.iloc[0,:].to_dict() for x , y in inp.groupby(level=0)]
d


[{'fixed acidity': 8.0,
  'volatile acidity': 0.18,
  'citric acid': 0.37,
  'residual sugar': 0.9,
  'chlorides': 0.049,
  'free sulfur dioxide': 36.0,
  'total sulfur dioxide': 109.0,
  'density': 0.9900700000000001,
  'pH': 2.89,
  'sulphates': 0.44,
  'alcohol': 12.7},
 {'fixed acidity': 6.8,
  'volatile acidity': 0.67,
  'citric acid': 0.0,
  'residual sugar': 1.9,
  'chlorides': 0.08,
  'free sulfur dioxide': 22.0,
  'total sulfur dioxide': 39.0,
  'density': 0.9970100000000001,
  'pH': 3.4,
  'sulphates': 0.74,
  'alcohol': 9.7},
 {'fixed acidity': 10.1,
  'volatile acidity': 0.31,
  'citric acid': 0.35,
  'residual sugar': 1.6,
  'chlorides': 0.075,
  'free sulfur dioxide': 9.0,
  'total sulfur dioxide': 28.0,
  'density': 0.99672,
  'pH': 3.24,
  'sulphates': 0.83,
  'alcohol': 11.2},
 {'fixed acidity': 7.3,
  'volatile acidity': 0.45,
  'citric acid': 0.36,
  'residual sugar': 5.9,
  'chlorides': 0.07400000000000001,
  'free sulfur dioxide': 12.0,
  'total sulfur dioxide': 87

In [88]:
import logging
logging.basicConfig(
    level=logging.DEBUG
)

/bin/sh: 1: docker: not found
