In [1]:
# https://docs.arize.com/arize/quickstart
! pip install arize

Collecting arize
  Downloading arize-7.0.2-py2.py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.3/78.3 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting requests-futures==1.0.0 (from arize)
  Downloading requests_futures-1.0.0-py2.py3-none-any.whl (7.4 kB)
Collecting googleapis-common-protos~=1.51 (from arize)
  Downloading googleapis_common_protos-1.59.0-py2.py3-none-any.whl (223 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.6/223.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting protobuf~=3.12 (from arize)
  Downloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hCollecting pandas<2,>=0.25.3 (from arize)
  Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [90m━━━━━

In [2]:
from arize.pandas.logger import Client, Schema
from arize.utils.types import ModelTypes, Environments, Schema, Metrics

API_KEY = 'YOUR API KEY'
SPACE_KEY = 'YOUR SPACE KEY'

arize_client = Client(space_key=SPACE_KEY, api_key=API_KEY)

In [3]:
from sklearn.datasets import load_breast_cancer
breast_cancer_dataset = load_breast_cancer()

In [4]:
breast_cancer_features = breast_cancer_dataset['data'] # feature data
breast_cancer_feature_names = breast_cancer_dataset['feature_names'] # feature names
breast_cancer_targets = breast_cancer_dataset['target'] # actual data
breast_cancer_target_names = breast_cancer_dataset['target_names'] # actual labels

In [5]:
target_name_transcription = [] # this will become our list of actuals

for i in breast_cancer_targets: 
  target_name_transcription.append(breast_cancer_target_names[i])

In [6]:
import pandas as pd

df = pd.DataFrame(breast_cancer_features, columns=breast_cancer_feature_names)
df['actual_label'] = target_name_transcription
df['prediction_label'] = target_name_transcription

# this is optional, but makes this example more interesting in the platform
df['prediction_label'] = df['prediction_label'].iloc[::-1].reset_index(drop=True)

In [18]:
schema = Schema(
    actual_label_column_name="actual_label",
    prediction_label_column_name="prediction_label",
    feature_column_names=[
       'mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'
       ]
)

In [19]:
response = arize_client.log(
    dataframe=df,
    schema=schema,
    model_id='breast_cancer_dataset', 
    model_version='v1',
    model_type=ModelTypes.BINARY_CLASSIFICATION,
    metrics_validation=[Metrics.CLASSIFICATION], 
    environment=Environments.PRODUCTION
)

[38;21m  arize.utils.logging | INFO | Success! Check out your data at https://app.arize.com/organizations/QWNjb3VudE9yZ2FuaXphdGlvbjoxNzYw/spaces/U3BhY2U6MTg2Nw==/models/modelName/breast_cancer_dataset?selectedTab=dataIngestion[0m
