In [None]:
import os
from pprint import pprint

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import pandas as pd

from pandas.io.json import json_normalize

import seaborn as sns
sns.set(style="darkgrid")

%matplotlib inline

In [None]:
# Get Elasticsearch URL from environment variable
elasticsearch_url = os.environ['ELASTICSEARCH_URL']

# Create Elasticsearch client
client = Elasticsearch([elasticsearch_url])

# Make sure client can connect to Elasticsearch instance
client.info()

In [None]:
search = Search(using=client)

In [None]:
total = search.count()
search = search[0:total]
results = search.execute()

In [None]:
results_df = json_normalize(results.hits.hits)

In [None]:
# List dataframe columns
for column in results_df.columns:
    print(column)

In [None]:
# Count number of results containing value for each column
results_count_sorted = results_df.count().sort_values(ascending=False).iteritems()

# Print each metric and value
for key, value in results_count_sorted:
    print(key, value)

In [None]:
# Add request_date column by converting request_at column to datetime
results_df['request_date'] = pd.to_datetime(results_df['_source.request_at'], unit='ms')

In [None]:
# Use request_date for dataframe index
results_df.set_index('request_date')

# Visualize

## Status codes

In [None]:
status_code_counts_plot = sns.countplot(x='_source.response_status', data=results_df, orient='v', color='teal')

## Proxy overhead

Proxy overhead is a measurement of how much latency the proxy adds to a request.

In [None]:
pd.DataFrame.hist(results_df, column='_source.proxy_overhead', bins=15)