In [2]:
import datetime
import altair as alt
import eland as ed
import json
import numpy as np
import matplotlib.pyplot as plt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
index_name = 'cars_outlier'

In [5]:
ed_df = ed.DataFrame('elastic:mlqa_admin@localhost:9200', index_name)
ed_df.head()

Unnamed: 0,Acceleration,Cylinders,Displacement,Horsepower,Miles_per_Gallon,Name,Origin,Weight_in_lbs,Year,ml.feature_influence.Acceleration,ml.feature_influence.Cylinders,ml.feature_influence.Displacement,ml.feature_influence.Horsepower,ml.feature_influence.Miles_per_Gallon,ml.feature_influence.Weight_in_lbs,ml.outlier_score,ml__id_copy
0,12.0,8,307.0,130.0,18.0,chevrolet chevelle malibu,USA,3504,1970-01-01,,,,,,,0.073549,0
1,11.5,8,350.0,165.0,15.0,buick skylark 320,USA,3693,1970-01-01,,,,,,,0.045366,1
100,14.5,8,318.0,150.0,14.0,plymouth fury gran sedan,USA,4237,1973-01-01,,,,,,,0.013962,100
101,11.0,8,440.0,215.0,13.0,chrysler new yorker brougham,USA,4735,1973-01-01,,,,,,,0.063068,101
102,11.0,8,455.0,225.0,12.0,buick electra 225 custom,USA,4951,1973-01-01,0.256998,0.18382,0.081211,0.145918,0.111639,0.220414,0.116306,102


In [9]:
url = 'http://localhost:9200/'+index_name+'/_search?size=1000'
url_data = alt.Data(url=url, format=alt.DataFormat(property='hits.hits',type='json'))

fields = ['Acceleration', 'Cylinders', 'Displacement', 'Horsepower',
       'Miles_per_Gallon', 'Name', 'Origin', 'Weight_in_lbs', 'Year','ml.outlier_score']

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

slider = alt.binding_range(min=0, max=1, step=.01, name='Outlier score Threshold:')
selector = alt.selection_single(name="SelectorName", fields=['cutoff'],
                                bind=slider, init={'cutoff': .8})

chart = alt.Chart(url_data).transform_calculate(**rename_dict).mark_point().encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
    color=alt.condition(
        alt.datum["ml.outlier_score"] >= selector.cutoff,
        alt.value('red'), alt.value('gray')
    ),
    opacity=alt.condition(
        alt.datum["ml.outlier_score"] >= selector.cutoff,
        alt.value(.75), alt.value(.25)
    ),
    size=alt.condition(
        alt.datum["ml.outlier_score"] >= selector.cutoff,
        alt.value(28), alt.value(2)
    ),
    tooltip=['Name:N', 'ml.outlier_score:Q', 'Horsepower:Q', 'Acceleration:Q', 'Miles_per_Gallon:Q']
).properties(
    width=150,
    height=150
).repeat(
    row=['Horsepower', 'Acceleration', 'Miles_per_Gallon'],
    column=['Miles_per_Gallon', 'Acceleration', 'Horsepower']
).interactive().add_selection(
    selector
)

chart

In [10]:
from kibana_vega_util import saveVegaVis
from elasticsearch import Elasticsearch 
es=Elasticsearch([{'host':'localhost','port':9200}])

saveVegaVis(es, index_name, 'def-vega-cars-outlier-1', chart, resultSize=1000)

{'_index': '.kibana_1',
 '_id': 'visualization:def-vega-cars-outlier-1',
 '_version': 9,
 'result': 'updated',
 '_shards': {'total': 1, 'successful': 1, 'failed': 0},
 '_seq_no': 368,
 '_primary_term': 1}