In [5]:
import fiddler as fdl
import pandas as pd

client = fdl.FiddlerApi()

If dataset is already in Fiddler, just use it. Otherwise upload it.

In [6]:
if 'wine_quality' in client.list_datasets():
    df_schema = client.get_dataset_info('wine_quality')
    df = client.get_dataset('wine_quality')['train']
else:
    df = pd.read_csv('/app/fiddler_samples/samples/datasets/winequality/train.csv')
    df_schema = fdl.DatasetInfo.from_dataframe(df, max_inferred_cardinality=1000)
    upload_result = client.upload_dataset(
        dataset={'train': df}, 
        dataset_id='wine_quality')

In [None]:
df_schema

In [7]:
target = 'quality'

feature_columns = df_schema.get_column_names()
feature_columns.remove('row_id')
feature_columns.remove(target)

model_info = fdl.ModelInfo.from_dataset_info(
    dataset_info=df_schema,
    target=target, 
    features=feature_columns,
    display_name='external model',
    description='this is an external model called from fiddler via rest API'
)
model_info

ModelInfo:
  display_name: external model
  description: this is an external model called from fiddler via rest API
  input_type: ModelInputType.TABULAR
  model_task: ModelTask.REGRESSION
  inputs:
                      column  dtype count(possible_values) is_nullable  \
    0          fixed acidity  FLOAT                              False   
    1       volatile acidity  FLOAT                              False   
    2            citric acid  FLOAT                              False   
    3         residual sugar  FLOAT                              False   
    4              chlorides  FLOAT                              False   
    5    free sulfur dioxide  FLOAT                              False   
    6   total sulfur dioxide  FLOAT                              False   
    7                density  FLOAT                              False   
    8                     pH  FLOAT                              False   
    9              sulphates  FLOAT                           

In [8]:
import pathlib
import shutil
import yaml

model_dir = pathlib.Path('external_model')
shutil.rmtree(model_dir, ignore_errors=True)
model_dir.mkdir()

with open(model_dir / 'model.yaml', 'w') as yaml_file:
    yaml.dump({'model': model_info.to_dict()}, yaml_file)


In [34]:
%%writefile external_model/package.py

from pathlib import Path
import pandas as pd

class SklearnModelPackage:
    is_classifier = False
    output_columns = ['predicted_quality']

    def predict(self, input_df):
        json_input = input_df.to_json(orient='records')
        print(json_input)
        return pd.DataFrame([1, 4, 5, 6], columns=self.output_columns)
    
def get_model():
    return SklearnModelPackage()


Overwriting external_model/package.py


In [41]:
def my_import(name):
    components = name.split('.')
    mod = __import__(components[0])
    for comp in components[1:]:
        mod = getattr(mod, comp)
    return mod

from importlib import reload

model_class = my_import('external_model.package')
reload(model_class)

test_input = df.drop(columns=['row_id', 'quality'])
model_class.get_model().predict(test_input[0:2])


[{"fixed acidity":8.6,"volatile acidity":0.49,"citric acid":0.29,"residual sugar":2.0,"chlorides":0.11,"free sulfur dioxide":19.0,"total sulfur dioxide":133.0,"density":0.9972,"pH":2.93,"sulphates":1.98,"alcohol":9.8},{"fixed acidity":8.0,"volatile acidity":0.18,"citric acid":0.37,"residual sugar":0.9,"chlorides":0.049,"free sulfur dioxide":36.0,"total sulfur dioxide":109.0,"density":0.99007,"pH":2.89,"sulphates":0.44,"alcohol":12.7}]


Unnamed: 0,predicted_quality
0,1
1,4
2,5
3,6


In [37]:
project_id = 'tutorial'
model_id = 'external_model'
client.delete_model(project_id, model_id)
client.upload_model_package(model_dir, project_id, model_id)

In [39]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
client.verbose=True
client.run_model(project_id, model_id, test_input)


Unnamed: 0,predicted_quality
0,1
1,4
2,5
3,6
