In [16]:
import datetime
import altair as alt
import eland as ed
import json
import numpy as np
import matplotlib.pyplot as plt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [17]:
from vega_datasets import data
pd_df = data.iris()
pd_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
sepalLength    150 non-null float64
sepalWidth     150 non-null float64
petalLength    150 non-null float64
petalWidth     150 non-null float64
species        150 non-null object
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [18]:
ed_df = ed.pandas_to_eland(
    pd_df.dropna(),
    'localhost',
    'eland_iris',
    es_if_exists="replace",
    es_refresh=True
)
ed_df.columns

Index(['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth', 'species'], dtype='object')

In [31]:
index_name='iris_classification_3'
ed_df = ed.DataFrame('elastic:mlqa_admin@localhost:9200', index_name)
ed_df.head()

Unnamed: 0,ml.feature_importance.feature_name,ml.feature_importance.importance,ml.feature_importance.setosa,ml.feature_importance.versicolor,ml.feature_importance.virginica,ml.is_training,ml.prediction_probability,ml.prediction_score,ml.species_prediction,ml.top_classes.class_name,ml.top_classes.class_probability,ml.top_classes.class_score,ml__id_copy,petalLength,petalWidth,sepalLength,sepalWidth,species
0,petalWidth,5.579236,2.789618,-1.259434,-1.530184,False,0.970936,0.915752,setosa,"[setosa, versicolor]","[0.9709358939219042, 0.021207332413162982]","[0.9157518171408758, 0.021207332413162982]",0,1.4,0.2,5.1,3.5,setosa
1,petalWidth,5.579236,2.789618,-1.259434,-1.530184,True,0.970936,0.915752,setosa,"[setosa, versicolor]","[0.9709358939219042, 0.021207332413162982]","[0.9157518171408758, 0.021207332413162982]",1,1.4,0.2,4.9,3.0,setosa
10,petalWidth,5.579236,2.789618,-1.259434,-1.530184,True,0.970936,0.915752,setosa,"[setosa, versicolor]","[0.9709358939219042, 0.021207332413162982]","[0.9157518171408758, 0.021207332413162982]",10,1.5,0.2,5.4,3.7,setosa
100,petalWidth,7.081256,-1.768982,-1.771646,3.540628,False,0.982049,0.897442,virginica,"[virginica, versicolor]","[0.9820485433071645, 0.009970069346977181]","[0.8974417978352741, 0.009970069346977181]",100,6.0,2.5,6.3,3.3,virginica
101,petalWidth,7.081256,-1.768982,-1.771646,3.540628,True,0.982049,0.897442,virginica,"[virginica, versicolor]","[0.9820485433071645, 0.009970069346977181]","[0.8974417978352741, 0.009970069346977181]",101,5.1,1.9,5.8,2.7,virginica


In [32]:
url = 'http://localhost:9200/'+index_name+'/_search?size=1000'
url_data = alt.Data(url=url, format=alt.DataFormat(property='hits.hits',type='json'))

fields = ['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth']

rename_dict = dict((a, 'datum._source.'+a) for a in fields)

def small_multiples(split='species'):
    url_chart = alt.Chart(url_data).transform_calculate(
        species_prediction='datum._source.ml.species_prediction',
        species='datum._source.species'
    ).transform_calculate(**rename_dict).transform_fold(
        fields,
        as_=['attribute', 'value']
    ).mark_bar().encode(
        alt.X('value:Q', title='', bin=True),
        alt.Y('count()', title=''),
        tooltip=[
            alt.Tooltip('value:Q', bin=True, title='x'),
            alt.Tooltip('count()', title='y')
        ],
        color=split+':N'
    ).properties(
        width=150,
        height=150
    )

    url_charts = alt.ConcatChart(
        concat=[
          url_chart.transform_filter(alt.datum.attribute == attribute).properties(title=attribute)
          for attribute in sorted(['petalLength', 'petalWidth', 'sepalLength', 'sepalWidth'])
        ],
        columns=2
    ).resolve_axis(
        x='independent',
        y='independent'
    ).resolve_scale(
        x='independent', 
        y='independent'
    )
    
    return url_charts

chart_raw = small_multiples('species')
chart_raw

In [33]:
chart_prediction = small_multiples('species_prediction')
chart_prediction

In [34]:
alt.Chart(url_data).transform_calculate(
    species_prediction='datum._source.ml.species_prediction',
    species='datum._source.species'
).transform_calculate(**rename_dict).mark_circle(size=30).encode(
    x='petalLength:Q',
    y='petalWidth:Q',
    color='species:N',
    tooltip=['species:N']
)

In [35]:
from kibana_vega_util import saveVegaLiteVis
from elasticsearch import Elasticsearch 
es=Elasticsearch([{'host':'localhost','port':9200}])

saveVegaLiteVis(es, index_name, 'iris-histogram-raw', chart_raw, resultSize=10000, timeField=False)
saveVegaLiteVis(es, index_name, 'iris-histogram-prediction', chart_prediction, resultSize=10000, timeField=False)

{'_index': '.kibana_1',
 '_id': 'visualization:iris-histogram-prediction',
 '_version': 8,
 'result': 'updated',
 '_shards': {'total': 1, 'successful': 1, 'failed': 0},
 '_seq_no': 893,
 '_primary_term': 1}