In [1]:
import datetime
import itertools

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
%matplotlib inline

from ipynb.fs.full.log_entries import get_all_log_entries

In [2]:
def plot_log_files_starting_with(file_prefix):
    all_log_entries = get_all_log_entries(file_prefix)
    
    dates = [x['log_date'] for x in all_log_entries]
    grouped = itertools.groupby(dates, lambda x: x.date())
    count_per_day = [*map(lambda x: to_dates_count(*x), grouped)]
    count_per_day.sort(key=lambda x: x["log_date"])
    aggregated_dates = [x["log_date"] for x in count_per_day]
    
    mat_dates = mdates.date2num(aggregated_dates)
    mat_values = [x["number_of"] for x in count_per_day]
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=7))
    plt.plot_date(mat_dates, mat_values, '-o')    
    
def to_dates_count(x, y):
    return dict(log_date=x, number_of=len(list(y)))

In [3]:
def plot_log_files_histogram_starting_with(file_prefix):
    all_log_entries = get_all_log_entries(file_prefix)
    
    dates = [x['log_date'] for x in all_log_entries]
    dates.sort(key=lambda x: to_total_minutes_time_slot(x))
    grouped = itertools.groupby(dates, key=lambda x: to_total_minutes_time_slot(x))
    count_per_day = [*map(lambda x: to_time_slots(*x), grouped)]
    mat_values = []
    for ts in range(0, 49): 
        number_of = filter(lambda x: x["time_slot"] == ts, count_per_day)
        mat_values.append(next(map(lambda x: x["number_of"], number_of), 0))
    
def to_total_minutes_time_slot(dt,time_slot_minutes = 30):
    total_minutes = (dt.hour * 60) + dt.minute
    return int(total_minutes / time_slot_minutes)
    
def to_time_slots(x, y):
    return dict(time_slot=x, number_of=len(list(y)))   

In [4]:
def plot_log_time_deltas_starting_with(file_prefix):
    all_log_entries = get_all_log_entries(file_prefix)
    
    time_deltas = []
    last_entry = None
    for entry in all_log_entries:
        if last_entry is not None:
            time_delta = dict()
            time_delta['delta'] = entry['log_date'] - last_entry['log_date']
            time_delta['log_date'] = last_entry['log_date']
            if time_delta['delta'] > datetime.timedelta(microseconds=1000):
                time_deltas.append(time_delta)
        last_entry = entry
    
    time_deltas.sort(key=lambda x: to_total_minutes_time_slot(x['log_date'], time_slot_minutes=24 * 60))
    week_days = list(filter(lambda delta: delta['log_date'].isoweekday() < 6, time_deltas))
    week_ends = list(filter(lambda delta: delta['log_date'].isoweekday() >= 6, time_deltas))

    fig, ax = plt.subplots(figsize=(10, 6))
    data = [*map(lambda x: x['delta'].total_seconds(), time_deltas)]
    week_days_data = [*map(lambda x: x['delta'].total_seconds(), week_days)]
    week_ends_data = [*map(lambda x: x['delta'].total_seconds(), week_ends)]
    
    ax.boxplot([data, week_days_data, week_ends_data], 0, '')
    ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',alpha=0.5)
    ax.set_xticklabels(['Wszystkie dni', 'Dni robocze', 'Dni przypadające w weekend'] ,fontsize=10)

def deltas(x, y):
    return [dat['delta'] / datetime.timedelta(seconds=1) for dat in y]