# Intro
This notebook creates a simple Iris classification model to allow us to test multiclass classification predictions and explanations.

In [None]:
import pathlib
import pickle
import os
import shutil

import numpy as np
import pandas as pd
import sklearn.datasets
import sklearn.model_selection
import sklearn.linear_model

# import the Fiddler package
import fiddler as fdl

DELETE_ASSETS = True # determines if model assets should be stored or removed after upload
DATASET_ID = 'iris'
PROJECT_ID = 'iris_classification'
MODEL_ID = 'logreg'

In [None]:
!conda install --yes pyyaml
import yaml # may come included with Python, depending on your setup

**Create a temporary local directory**</br>
We will use this directory to store related model files so that we canupload them to the server.

In [None]:
MODEL_DIR = os.path.join(os.getcwd(), r'model_assets')
if not os.path.exists(MODEL_DIR):
   os.makedirs(MODEL_DIR)

**Set up the Fiddler Client**</br>
This client will be used to access server side functionality such as fetching our dataset and uploading our model artifacts.


In [None]:
# set up your Fiddler connection

# NOTE: typically the API url for your running instance of Fiddler will be "https://dev.fiddler.ai" (or "http://localhost:4100" for onebox)
url = os.getenv('FIDDLER_URL')

# see <Fiddler URL>/settings/credentials to find, create, or change this token
token = os.getenv('FIDDLER_API_TOKEN')

# see <Fiddler URL>/settings/general to find this id (listed as "Organization Name")
org_id = 'onebox'

fiddler_api = fdl.FiddlerApi(url=url, org_id=org_id, auth_token=token)

In [None]:
iris = fiddler_api.get_dataset(
    dataset_id=DATASET_ID,
    splits= ["train", "test"] # fetch the two necessary splits of the dataset
)

train_df, test_df = iris["train"], iris["test"]
print(train_df.shape, test_df.shape)
train_df.sample(5, random_state=0)

(120, 5) (30, 5)


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
114,5.8,2.8,5.1,2.4,2
62,6.0,2.2,4.0,1.0,1
33,5.5,4.2,1.4,0.2,0
107,7.3,2.9,6.3,1.8,2
7,5.0,3.4,1.5,0.2,0


<strong>Setting up the dataset</strong></br>
We want to ensure that the columns we wish to use as feature vectors are clearly set aside,
and that our target column is distinctly noted.

In [None]:
target = "species" # prediciton target
features = list(train_df.columns).remove(target) # remove target column from training data

cls = sklearn.linear_model.LogisticRegression(C=.1, solver='lbfgs', multi_class='multinomial', max_iter=9999)
cls.fit(train_df.drop(columns=['species']), train_df['species'])

LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=9999, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

<strong>ModelInfo</strong></br>
For Fiddler to properly run and explain your model, you need to provide some information about model inputs and outputs that is not captured by the sklearn object itself. Luckily the Dataset we created above has a DatasetInfo component that can help us infer the ModelInfo of models trained on that dataset.

In [None]:
model_info = fdl.ModelInfo.from_dataset_info(
    dataset_info=fiddler_api.get_dataset_info(DATASET_ID),
    target=target, 
    features=features,
    display_name='Iris LogReg',
    description='A Logistic Regression model trained for predict the `species` feature of the iris dataset.'
)
model_info

**ModelInfo to YAML**</br>
We need to convert the ModelInfo object to a corresponding dictionary, and set it up to be converted to YAML.

In [None]:
# custom method which creates a dict 
# corresponding to the expected format of a model.yaml file
model_info = model_info.to_dict()

# model.yaml contents expected to be under one 'model' key
model_info = {
    'model': model_info
}

<strong>Create a model.yaml file</strong> </br>
We want to convert the ModelInfo object into a YAML representation. </br>
This can be done casting it to a dictionary and using PyYaml.

In [None]:
# write the yaml to a model.yaml file
YAML_PATH = os.path.join(MODEL_DIR, 'model.yaml')
with open(YAML_PATH, 'w+') as outfile:
    yaml.dump(model_info, outfile)

<strong>Saving the model</strong> </br>
The model needs to be binarized using a Python PKL file.

In [None]:
# save the pickled model
PKL_PATH = os.path.join(MODEL_DIR, 'model.pkl')
with open(PKL_PATH, 'wb+') as pkl_file:
    pickle.dump(cls, pkl_file)

<strong>Creating a package.py</strong> </br>
The following cell is exported to a `package.py` file using the `%%writefile` action. 

In [None]:
PKG_PATH = os.path.join(MODEL_DIR, 'package.py')
with open(PKG_PATH, 'w+') as fp: # create an empty package.py
    pass

In [None]:
%%writefile $PKG_PATH
from pathlib import Path

from sklearn_wrapper import SimpleSklearnModel


PACKAGE_PATH = Path(__file__).parent
MODEL_FILE_NAME = 'model.pkl'
PRED_COLUMN_NAMES = ['setosa', 'versicolor', 'virginica']


def get_model():
    return SimpleSklearnModel(PACKAGE_PATH / MODEL_FILE_NAME,
                              PRED_COLUMN_NAMES, is_classifier=True,
                              is_multiclass=True)

Overwriting /content/model_assets/package.py


**Upload the model assets**</br>
With all of our files in place, we will use the Fiddler client to upload them as a package.

In [None]:
fiddler_api.upload_model_package(
    artifact_path=MODEL_DIR, # expects a model.yaml, package.py, and model.pkl
    project_id=PROJECT_ID,
    model_id=MODEL_ID
)

**Testing the model**</br>
To be sure that the model was successfully uplaoded, let's run some predicitons on the first ten rows.

In [None]:
predictions = fiddler_api.run_model(
    project_id=PROJECT_ID,
    model_id=MODEL_ID,
    df=train_df.head(10)
)
predictions

**Clean up assets folder**</br>
Since we succesfully uploaded our model using the Fiddler client, we can remove the local directory storing our model assets.

In [None]:
if DELETE_ASSETS:
  shutil.rmtree(MODEL_DIR)