Experiment working with anonymized API log data.

In [None]:
import pandas as pd
%matplotlib inline

from bokeh.io import output_notebook
from bokeh.plotting import figure, show

output_notebook()

In [None]:
# Import API logs data
api_logs = pd.read_csv('api_logs.csv', index_col=0, parse_dates=[0])

# Take a look at the columns available
api_logs.columns

In [None]:
# Make sure DataFrame has DatetimeIndex
print(api_logs.index)

In [None]:
def is_success_function(row):
    """
    Check if row (request) was successful
    Successful means a response with 2xx status
    
    return True if successful False otherwise
    """
    success = (row['Status'] >= 200 and row['Status'] < 300)
    
    if success:
        return 'Success'
    else:
        return 'Failure'
    

In [None]:
# Add 'Success' column to API Logs
# Success is determined by status code, where any status in the 200s is considered successful
api_logs['Outcome'] = api_logs.apply(is_success_function , axis='columns')
api_logs.head()

In [None]:
# Export CSV containing Outcome column
api_logs.to_csv('api_logs_with_outcome.csv')

# Explore
Get a feel for the data.

In [None]:
# Check the distribution of statuses in the data
api_response_code_distributions_plot = api_logs['Status'].value_counts().plot('bar', title='Response code counts')

In [None]:
# Compare number of successful versus unsuccessful requests
success_versus_failure = api_logs.groupby('Outcome').size().plot('bar', title='Successful versus unsuccessful requests')

In [None]:
# Group API logs by date and outcome
daily_failure_success_groups = api_logs.groupby([api_logs.index.date, 'Outcome']).size()

# Take a look at the grouped data
daily_failure_success_groups.head()

In [None]:
# Flatten grouped data, so each row has a failure and success column
# Replace NaN numbers with zero (fill_value=0)
success_and_failure_over_time = daily_failure_success_groups.unstack(fill_value=0)

# Take a look at the unstacked data
success_and_failure_over_time.head()

In [None]:
# Plot success and failure countes over time
success_and_failure_chart = success_and_failure_over_time.plot(rot=45)

In [None]:
# create a new plot with a datetime axis type
p = figure(plot_width=800, plot_height=250, x_axis_type="datetime", title="Success and failure over time")

# Add red line showing failure
p.line(success_and_failure_over_time.index, success_and_failure_over_time['Failure'], color='red', alpha=0.5)

# Add green line showing success
p.line(success_and_failure_over_time.index, success_and_failure_over_time['Success'], color='green', alpha=0.5)


# Show the plot
show(p)