In [154]:
import numpy as np
from matplotlib import pyplot as plt
%matplotlib notebook

In [134]:
def readInFile(filename):
    results = {}
    with open(filename) as f:
        for line in f:
            line = line.replace("(", "")
            line = line.replace(")", "")
            lineStats = line.strip().split(",")
            results[lineStats[0]] = generatePercentileString(lineStats[1:])
            
    sortedResults = {}
    for k, v in sorted( results.items() ):
        sortedResults[k] = v
        
    monthResults = {}
    for k, v in sortedResults.items():
        month = k[5:7]
        if month not in monthResults:
            monthResults[month] = {}
        monthResults[month][k] = v

    return monthResults

In [170]:
def generatePercentileString(lineStats):
    minV = float( lineStats[0] )
    maxV = float( lineStats[1] )
    median = float( lineStats[2] )
    Q1 = float( lineStats[3] )
    Q3 = float( lineStats[4] )
    avg = float( lineStats[5] )
    count = int( lineStats[6] )
    
    threshold = float( lineStats[7] )
    if threshold < maxV:
        maxV = threshold
        
    outliers = lineStats[8].split(" ")
    if outliers[0] is '':
        outliers = None
    else:
        outliers = list(map(float, outliers))
        
    return [ [minV, Q1, median, Q3, maxV, outliers], avg, count ]

def getPercentiles(stats):
    percentiles = {}
    for month, days in statNoZero.items():
        percentiles[month] = []
        for date, stats in days.items():
            percentiles[month].append(stats[0])
    return percentiles

In [139]:
def customized_box_plot(percentiles, axes, redraw = True, *args, **kwargs):
    """
    Generates a customized boxplot based on the given percentile values
    https://stackoverflow.com/questions/27214537/
    is-it-possible-to-draw-a-matplotlib-boxplot-given-the-percentile-values-instead
    """
    n_box = len(percentiles)
    box_plot = axes.boxplot([[-9, -4, 2, 4, 9],]*n_box, *args, **kwargs) 
    # Creates len(percentiles) no of box plots

    min_y, max_y = float('inf'), -float('inf')

    for box_no, (q1_start, 
                 q2_start,
                 q3_start,
                 q4_start,
                 q4_end,
                 fliers_xy) in enumerate(percentiles):

        # Lower cap
        box_plot['caps'][2*box_no].set_ydata([q1_start, q1_start])
        # xdata is determined by the width of the box plot

        # Lower whiskers
        box_plot['whiskers'][2*box_no].set_ydata([q1_start, q2_start])

        # Higher cap
        box_plot['caps'][2*box_no + 1].set_ydata([q4_end, q4_end])

        # Higher whiskers
        box_plot['whiskers'][2*box_no + 1].set_ydata([q4_start, q4_end])

        # Box
        box_plot['boxes'][box_no].set_ydata([q2_start, 
                                             q2_start, 
                                             q4_start,
                                             q4_start,
                                             q2_start])

        # Median
        box_plot['medians'][box_no].set_ydata([q3_start, q3_start])
        
        #Convert fliers to proper format
        if fliers_xy is not None:
            fliers_x = np.array( [box_no +1 for _ in fliers_xy] )
            fliers_xy = [fliers_x, np.array( fliers_xy )]
        
        # Outliers
        if fliers_xy is not None and len(fliers_xy[0]) != 0:
            # If outliers exist
            box_plot['fliers'][box_no].set(xdata = fliers_xy[0],
                                           ydata = fliers_xy[1])

            min_y = min(q1_start, min_y, fliers_xy[1].min())
            max_y = max(q4_end, max_y, fliers_xy[1].max())

        else:
            min_y = min(q1_start, min_y)
            max_y = max(q4_end, max_y)

        # The y axis is rescaled to fit the new box plot completely with 10% 
        # of the maximum value at both ends
        axes.set_ylim([min_y*1.1, max_y*1.1])

    # If redraw is set to true, the canvas is updated.
    if redraw:
        ax.figure.canvas.draw()

    return box_plot

In [171]:
def plotMonth(filename, month):
    stats = readInFile(filename)
    percentiles = getPercentiles(stats)
    fig, ax = plt.subplots()
    b = customized_box_plot(percentiles[month], ax, redraw=True, notch=0, sym='+', vert=1, whis=1.5)
    ax.set_title(month + " 2018 Delay Times From Denver to Chicago")
    ax.tick_params(axis='x', pad=0.5, labelsize='small', labelrotation=30)
    ax.set_ylabel("Minutes Delayed")
    ax.set_xlabel("Date")
    ax.grid()

    plt.savefig("AtoBYear" + month + ".png")

def plotYear(filename):
    stats = readInFile(filename)
    percentiles = getPercentiles(stats)
    
    fig, axes = plt.subplots(4, 3, figsize=(25,25))
    fig.suptitle("2018 Delay Times From Denver to Chicago", fontsize=24)
    fig.subplots_adjust(top=0.95)
    months = list( percentiles.keys() )
    i = 0
    
    for axRow in axes:
        for ax in axRow:
            b = customized_box_plot(percentiles[months[i]], ax, redraw=True, notch=0, sym='+', vert=1, whis=1.5)
            ax.set_title("Month: {}".format(months[i]))
            ax.set_ylim([0, 1100])
            ax.tick_params(axis='x', pad=0.5, labelsize='small', labelrotation=30)
            ax.grid()
            i += 1

    plt.savefig("AtoBYear.png")

In [175]:
plotMonth("ug.txt", '10')

<IPython.core.display.Javascript object>

In [None]:
#check outliers!!