## Initialize Fiddler Client


In [10]:
import fiddler as fdl
client = fdl.FiddlerApi()

## Load dataset


In [11]:
import pandas as pd
df = pd.read_csv('/app/fiddler_samples/samples/datasets/winequality/train.csv')
df_schema = fdl.DatasetInfo.from_dataframe(df, max_inferred_cardinality=1000)

## Upload dataset

In [12]:
if 'wine_quality' not in client.list_datasets():
    upload_result = client.upload_dataset(
        dataset={'train': df}, 
        dataset_id='wine_quality')


## Create model schema

In [13]:
target = 'quality'
train_input = df.drop(columns=['row_id', 'quality'])
train_target = df[target]

feature_columns = list(train_input.columns)

model_info = fdl.ModelInfo.from_dataset_info(
    dataset_info=client.get_dataset_info('wine_quality'),
    target=target, 
    features=feature_columns,
    display_name='sklearn model',
    description='this is a sklearn model from tutorial'
)

## Train model

In [14]:
import sklearn.linear_model
import sklearn.pipeline
import sklearn.preprocessing


regressor = sklearn.linear_model.LinearRegression()

full_model = sklearn.pipeline.Pipeline(steps=[
        ('standard_scaling', sklearn.preprocessing.StandardScaler()),
        ('model_name', regressor),
    ])

full_model.fit(train_input, train_target)
full_model.predict(train_input)

array([6.50506782, 6.65427237, 5.35208865, ..., 5.27678525, 6.36560789,
       4.44488096])

## Save model and schema

In [15]:
import pathlib
import shutil
import pickle
import yaml

project_id = 'tutorial'
model_id = 'wine_quality_model'

# create temp dir
model_dir = pathlib.Path(model_id)
shutil.rmtree(model_dir, ignore_errors=True)
model_dir.mkdir()

# save model
with open(model_dir / 'model.pkl', 'wb') as pkl_file:
    pickle.dump(full_model, pkl_file)

# save model schema
with open(model_dir / 'model.yaml', 'w') as yaml_file:
    yaml.dump({'model': model_info.to_dict()}, yaml_file)


## Write package.py wrapper

In [16]:
%%writefile wine_quality_model/package.py

import pickle
from pathlib import Path
import pandas as pd

PACKAGE_PATH = Path(__file__).parent

class SklearnModelPackage:
    is_classifier = False
    output_columns = ['predicted_quality']

    def __init__(self):
        with open(PACKAGE_PATH / 'model.pkl', 'rb') as infile:
            self.model = pickle.load(infile)

    def predict(self, input_df):
        f = self.model.predict if not self.is_classifier else self.model.predict_proba
        return pd.DataFrame(f(input_df), columns=self.output_columns)
    
def get_model():
    return SklearnModelPackage()


Writing wine_quality_model/package.py


## Validate model package

This verifies consistency between `df_schema`, `model_info`, and `package.py`; and performs local functional tests on the wrapped model.

In [17]:
from fiddler import PackageValidator
validator = PackageValidator(model_info, df_schema, model_dir)
passed, errors = validator.run_chain()

ValueError: operands could not be broadcast together with shapes (1,13) (11,) (1,13) 

## Upload model

In [18]:
if project_id not in client.list_projects():
    client.create_project(project_id)
client.delete_model(project_id, model_id)
client.upload_model_package(model_dir, project_id, model_id)

## Run model

In [25]:
prediction_input = train_input[0: 10]
result = client.run_model(project_id, model_id, prediction_input, log_events=True)
result
dir(prediction_input.dtypes)

prediction_input.dtypes.keys


<bound method Series.keys of fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
dtype: object>