# Load Peak Data

In [1]:
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams['text.latex.preamble'] = [
    r'\usepackage{sansmath}',  # load up the sansmath so that math -> helvet
    r'\sansmath'               # <- tricky! -- gotta actually tell tex to use!
]
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'sans-serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
from pandas.plotting import register_matplotlib_converters
import numpy as np
import pandas as pd
import glob

# --------- Set peak data file here -----------
peakData = [{"name": "Hypixel", "data": pd.read_csv('peaks/hypixel_peaks.csv', parse_dates=["startdate", "enddate"])},
{"name": "Minehut", "data": pd.read_csv('peaks/minehut_peaks.csv', parse_dates=["startdate", "enddate"])},
{"name": "Cubecraft", "data": pd.read_csv('peaks/cubecraft_peaks.csv', parse_dates=["startdate", "enddate"])},
{"name": "The Hive", "data": pd.read_csv('peaks/hive_peaks.csv', parse_dates=["startdate", "enddate"])}
]
# ---------------------------------------------

for item in peakData:
    item["data"]['timespan'] = pd.to_timedelta(item["data"]['timespan'])

combinedPeaks = pd.concat([x["data"] for x in peakData])

#peakData['timespan'] = pd.to_timedelta(peakData['timespan'])
#peakData['startdate'] = pd.to_datetime(peakData['startdate'])
#peakData['enddate'] = pd.to_datetime(peakData['enddate'])

  matplotlib.rcParams['text.latex.preamble'] = [


In [27]:
#[x["data"].describe() for x in peakData]
#peakData.describe()
for item in peakData:
    data = item['data']
    print(item['name'])
    print(data.describe(percentiles=[0.25, 0.50, 0.75, 0.90, 0.95]))
    print("\n")

print(combinedPeaks.describe(percentiles=[0.25, 0.50, 0.75, 0.90, 0.95]))

Hypixel
       Unnamed: 0     group                   timespan     magnitude  \
count    91.00000  91.00000                         91     91.000000   
mean     45.00000  45.00000  0 days 00:20:35.604395604  24541.637885   
std      26.41338  26.41338  0 days 00:17:40.791328093  19077.137454   
min       0.00000   0.00000            0 days 00:04:00   2609.200000   
25%      22.50000  22.50000            0 days 00:10:00  10623.275000   
50%      45.00000  45.00000            0 days 00:16:00  16104.500000   
75%      67.50000  67.50000            0 days 00:24:00  31167.250000   
90%      81.00000  81.00000            0 days 00:42:00  59135.216216   
95%      85.50000  85.50000            0 days 00:55:00  67909.887500   
max      90.00000  90.00000            0 days 01:40:00  76589.250000   

             drop        shape  
count   91.000000    91.000000  
mean    26.285977  -681.201658  
std     18.593783  1500.440909  
min      1.949910 -3511.684375  
25%     14.179359 -1295.244507  
5

# Basic Stats

In [28]:
for item in peakData:
    print(item["name"] + ":")
    print("Total number of failures: " + str(len(item["data"])))
    print("Number of complete failures: " + str(len(item["data"].loc[item["data"]['drop'] > 95])))
    print("Avg. Interarrival Time: " + str(item["data"]['startdate'].diff().mean()))
    print("Max. Interarrival Time: " + str(item["data"]['startdate'].diff().max()))
    print("Average duration for non critical failures: " + str( item["data"].loc[item["data"]['drop'] < 25]['timespan'].mean() ))
    #print(item["data"]['startdate'].diff().sort_values(ascending=False))
    print("\n")



Hypixel:
Total number of failures: 91
Number of complete failures: 1
Avg. Interarrival Time: 3 days 02:23:30.666666666
Max. Interarrival Time: 15 days 08:16:00
Average duration for non critical failures: 0 days 00:14:04.528301886


Minehut:
Total number of failures: 275
Number of complete failures: 33
Avg. Interarrival Time: 1 days 00:56:03.065693430
Max. Interarrival Time: 13 days 10:14:00
Average duration for non critical failures: 0 days 00:12:41.503759398


Cubecraft:
Total number of failures: 52
Number of complete failures: 1
Avg. Interarrival Time: 5 days 11:39:17.647058823
Max. Interarrival Time: 31 days 22:16:00
Average duration for non critical failures: 0 days 00:20:42.352941176


The Hive:
Total number of failures: 73
Number of complete failures: 13
Avg. Interarrival Time: 4 days 00:06:03.333333333
Max. Interarrival Time: 40 days 05:56:00
Average duration for non critical failures: 0 days 00:09:45.714285714




# Distributions of the peak characteristics

In [29]:
%matplotlib widget

dataIndex = 1
binsize = 100

plt.subplot(221)
peakData[dataIndex]['data']['timespan'].astype("timedelta64[m]").plot.hist(bins=binsize, title="Durations")
plt.subplot(222)
peakData[dataIndex]['data']['magnitude'].plot.hist(bins=binsize, title="Magnitudes")
plt.subplot(223)
peakData[dataIndex]['data']['drop'].plot.hist(bins=binsize, title="Drop percentages")
plt.subplot(224)
peakData[dataIndex]['data']['shape'].plot.hist(bins=binsize, title="Shapes")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:title={'center':'Shapes'}, ylabel='Frequency'>

# ECDF of failure duration and interarrival time

In [5]:
%matplotlib widget
from statsmodels.distributions.empirical_distribution import ECDF

#peakData['startdate'].diff().astype("timedelta64[m]").plot.hist(bins=150)

for i, item in enumerate(peakData):
    data = item['data']
    ecdf = ECDF(data['startdate'].diff().astype("timedelta64[s]"))
    ecdf2 = ECDF(data['timespan'].astype("timedelta64[s]"))
    # plot the cdf
    plt.subplot(221 + i)

    plt.axvline(3600, color='gray', linestyle='--', linewidth="1", label='_nolegend_')
    plt.axvline(14400, color='gray', linestyle='--', linewidth="1", label='_nolegend_')
    plt.axvline(86400, color='gray', linestyle='--', linewidth="1", label='_nolegend_')
    plt.axvline(345600, color='gray', linestyle='--', linewidth="1", label='_nolegend_')

    plt.text(2200,0.08,'1 Hour',rotation=90, color='gray', size=9)
    plt.text(9000,0.08,'4 Hours',rotation=90, color='gray', size=9)
    plt.text(52000,0.08,'1 Day',rotation=90, color='gray', size=9)
    plt.text(210000,0.08,'4 Days',rotation=90, color='gray', size=9)

    plt.plot(ecdf.x, ecdf.y)
    plt.plot(ecdf2.x, ecdf2.y)
    if (i > 1):
        plt.xlabel("Time (s)")
    if (i % 2 == 0):    
        plt.ylabel("Cumulative Probability")
    ax = plt.gca()
    ax.set_xscale('log')
    ax.set_title(item["name"])

    plt.xlim(left=500)
    plt.show()
    

plt.tight_layout()
plt.subplots_adjust(top=0.87)
ax.legend(['Interarrival Time', 'Duration'], loc=(-0.62, 2.55), ncol=2)
fig = plt.gcf()
fig.set_size_inches(5.3,5.5)
plt.show()
plt.savefig('plots/mc_ecdf.pgf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [31]:
from scipy.stats import kstest


d1 = peakData[0]['data']['startdate'].diff().astype("timedelta64[s]")
d2 = peakData[1]['data']['startdate'].diff().astype("timedelta64[s]")
d3 = peakData[2]['data']['startdate'].diff().astype("timedelta64[s]")
d4 = peakData[3]['data']['startdate'].diff().astype("timedelta64[s]")
print(kstest(d1, d2))
print(kstest(d1, d3))
print(kstest(d1, d4))
print(kstest(d2, d3))
print(kstest(d2, d4))
print(kstest(d3, d4))

avg_sim = (kstest(d1, d3).statistic + kstest(d1, d4).statistic + kstest(d3, d4).statistic) / 3 
avg_diff = (kstest(d1, d2).statistic + kstest(d2, d3).statistic + kstest(d2, d4).statistic) / 3

print(avg_sim)
print(avg_diff)


KstestResult(statistic=0.4305694305694306, pvalue=6.5076832811428176e-12)
KstestResult(statistic=0.17032967032967034, pvalue=0.26197026585473937)
KstestResult(statistic=0.11681469215715791, pvalue=0.5839828935534844)
KstestResult(statistic=0.42475524475524473, pvalue=1.2227630730166084e-07)
KstestResult(statistic=0.4485678704856787, pvalue=4.919553653337516e-11)
KstestResult(statistic=0.17623814541622762, pvalue=0.263646585716242)
0.15446083596768528
0.43463084860345136


# Correlation plots for drop and duration

In [38]:
%matplotlib widget
from scipy import stats

for i, item in enumerate(peakData):
    data = item['data']
    # plot the cdf
    plt.subplot(221 + i)

    plt.scatter(data['drop'], data['timespan'].astype("timedelta64[m]"), s=4)

    m, b = np.polyfit(data['drop'], data['timespan'].astype("timedelta64[m]"), 1)
    plt.plot(data['drop'], m*data['drop'] + b, color="red")

    ax = plt.gca()
    fig = plt.gcf()
    fig.set_size_inches(4.5,4.5)

    if (i > 1):
        ax.set_xlabel("% of affected players")
    if (i % 2 == 0):    
        ax.set_ylabel("Failure duration (min.)")

    ax.set_title(item["name"])
    # ax.legend(['Interarrival Time', 'Duration'])
    plt.tight_layout()
    plt.show()
    plt.savefig('plots/mc_corr.pgf')

    cor = stats.spearmanr(data['drop'], data['timespan'].astype("timedelta64[m]"))
    print(item["name"] + str(cor))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

HypixelSpearmanrResult(correlation=0.5778994350577826, pvalue=1.9982788588574052e-09)
MinehutSpearmanrResult(correlation=0.4604685414699176, pvalue=7.727739801241559e-16)
CubecraftSpearmanrResult(correlation=0.5212435243260515, pvalue=7.429404520591825e-05)
The HiveSpearmanrResult(correlation=0.6014989705931603, pvalue=1.8262007448579775e-08)


# When do failures happen?
A heatmap that shows the amount of failures per hour, for every weekday

In [32]:
%matplotlib widget

import datetime
import calendar
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.ticker import FuncFormatter
import matplotlib.colors as colors
from mycolorpy import colorlist as mcp
import matplotlib.cm as cm

tsses = ["final_ts_hypixel.csv", "final_ts_minehut.csv", "final_ts_cubecraft.csv", "final_ts_hive.csv"]

for i, item in enumerate(peakData):
    data = item['data']


    # ------------------ TRAFFIC HEATMAP ----------------------------------
    plt.subplot(421 + i*2)

    df = pd.DataFrame(pd.read_csv(tsses[i], header=0, parse_dates=[0], index_col=0))
    df.insert(1, 'weekday', df.index.weekday)
    df.insert(2, 'time', df.index.to_series().apply(lambda x: x.replace(day=1, month=1, year=2020)))

    lst = []

    hours = ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
    "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"]
    weekdays = list(reversed(calendar.day_abbr))

    for wd in range(6, -1, -1):
        ls = []
        for tm in range(0,24):
            ls.append(df['0'].where(np.logical_and(df['time'].dt.hour == tm, df['weekday'] == wd)).mean())
        lst.append(ls)

    arr = np.array(lst)

    #fig, ax = plt.subplots(figsize=(10.5,5.5))
    fig = plt.gcf()
    ax = plt.gca()

    NUM_COLORS = 7
    cmap = matplotlib.colors.ListedColormap(mcp.gen_color(cmap="Oranges",n=NUM_COLORS))
    norm = matplotlib.colors.BoundaryNorm(np.linspace(np.amin(arr),np.amax(arr),NUM_COLORS), NUM_COLORS)

    im = ax.imshow(arr, interpolation="nearest", cmap=cmap, norm=norm)

    ax.set_xticks(np.arange(len(hours)))
    ax.set_yticks(np.arange(len(weekdays)))

    ax.set_xticklabels(hours)
    ax.set_yticklabels(weekdays)

    plt.ylabel("Day of Week")
    if (i == 3):
        plt.xlabel("Time of Day (Hour)")
    else:
        #ax.get_xaxis().set_visible(False)
        #ax.set_xticklabels([])
        pass

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
            rotation_mode="anchor")

    ax.set_title(item["name"] + " average online player counts")

    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)

    #cb = plt.colorbar(im, cax=cax)
    cb = plt.colorbar(im, cax=cax, format=FuncFormatter(lambda x, pos: '{:,.0f}'.format(x/1000) + 'K'))
    #cb.set_label('Average Player Count')
    #fig.tight_layout()
    fig = plt.gcf()  
    fig.set_size_inches(4,3)

    plt.show()


    # ------------------ FAILURES HEATMAP ----------------------------------
    plt.subplot(421 + i*2 + 1)

    df = data.copy()#pd.DataFrame(peakData)
    #df.insert(1, 'weekday', df['startdate'].values.weekday)
    df.insert(1, 'weekday', df['startdate'].apply(lambda x: x.weekday()))
    df.insert(2, 'time', df['startdate'].apply(lambda x: x.replace(day=1, month=1, year=2020)))

    lst = []

    #hours = range(0,24)
    hours = ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
    "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"]
    weekdays = list(reversed(calendar.day_abbr))

    for wd in range(6, -1, -1):
        ls = []
        for tm in range(0,24):
            #ls.append(df['0'].where(np.logical_and(df['time'].dt.hour == tm, df['weekday'] == wd)).mean())
            ls.append( df['weekday'].where(np.logical_and(df['time'].dt.hour == tm, df['weekday'] == wd)).count() )
        lst.append(ls)

    arr = np.array(lst)

    #fig, ax = plt.subplots(figsize=(10.5,5.5))
    fig = plt.gcf()
    ax = plt.gca()

    if (i==0):
        NUM_COLORS = 5
    elif (i==1):
        NUM_COLORS = 5
    elif (i==2):
        NUM_COLORS = 7
    elif (i==3):
        NUM_COLORS = 5

    cmap = matplotlib.colors.ListedColormap(mcp.gen_color(cmap="Oranges",n=NUM_COLORS))
    norm = matplotlib.colors.BoundaryNorm(np.linspace(np.amin(arr),np.amax(arr),NUM_COLORS), NUM_COLORS)

    im = ax.imshow(arr, interpolation="nearest", cmap=cmap, norm=norm)

    ax.set_xticks(np.arange(len(hours)))
    ax.set_yticks(np.arange(len(weekdays)))

    ax.set_xticklabels(hours)
    ax.set_yticklabels(weekdays)

    if (i == 3):
        plt.xlabel("Time of Day (Hour)")
    else:
        #ax.get_xaxis().set_visible(False)
        #ax.set_xticklabels([])
        pass

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
            rotation_mode="anchor")


    ax.set_title(item["name"] + " failure counts")

    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)


    def colorbar_index(ncolors, cmap):
        mappable = cm.ScalarMappable(cmap=cmap)
        mappable.set_array([])
        mappable.set_clim(-0.5, ncolors+0.5)
        colorbar = plt.colorbar(mappable, cax=cax)
        colorbar.set_ticks(np.linspace(0, ncolors, ncolors))
        colorbar.set_ticklabels(range(ncolors))

    def colorbar_index2(ncolors, cmap):
        mappable = cm.ScalarMappable(cmap=cmap)
        mappable.set_array([])
        mappable.set_clim(0, ncolors*2)
        colorbar = plt.colorbar(mappable, cax=cax, format=FuncFormatter(lambda x, pos: '{:,.0f}'.format(x)))
        colorbar.set_ticks(np.round(np.linspace(0, ncolors*2, ncolors+1)))
        #colorbar.set_ticklabels(range(ncolors*2))
        colorbar.set_ticklabels(np.round(np.linspace(0, ncolors*2, ncolors+1)))

    if (i==1):
        colorbar_index2(ncolors=NUM_COLORS, cmap=cmap)  
    else:
        colorbar_index(ncolors=NUM_COLORS, cmap=cmap)  
    # cb = plt.colorbar(im, cax=cax)

    # labels = np.linspace(0,(NUM_COLORS),NUM_COLORS+1)
    # print(labels)
    # loc    = labels + .4
    # cb.set_ticks(loc)
    # cb.set_ticklabels(labels)


    fig = plt.gcf()  
    fig.set_size_inches(4,3)
    #cb.set_label('Number of Failures')
    #fig.tight_layout()


fig = plt.gcf()  
fig.set_size_inches(9,6.5)
fig.tight_layout()
plt.show()
plt.savefig('plots/mc_heat.pgf')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …