In [None]:
import logging

from IPython.core.display import display, HTML

from scripts.main import *
from scripts.retrieve_data import *
from scripts.extract_data import *
from scripts.create_report import *

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.backends.backend_pdf import PdfPages

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

# set the width of the notebook
display(HTML("<style>.container { width:95% !important; }</style>"))

### Create the data (if needed)

In [None]:
config = load_config()
config['main']['start_date'] = '2019-01-07'
#config['main']['end_date'] = '2019-01-11'
config['main']['end_date'] = '2019-01-18'
#config['main']['end_date'] = '2019-04-26'
load_transform_and_save_data_from_files(config)

### Load the data

In [None]:
config = load_config()
config['main']['start_date'] = '2019-01-07'
#config['main']['end_date'] = '2019-01-11'
config['main']['end_date'] = '2019-01-18'
#config['main']['end_date'] = '2019-04-26'
    
# create the path where the data for the current config would be stored
day_period_str = '{}_{}'.format(config['main']['start_date'], config['main']['end_date']).replace('-', '')
studies_save_path = 'data/studies/studies_{}.pkl'.format(day_period_str)

df = pd.read_pickle(studies_save_path)

# exclude some machines and do some grouping up
df['Machine'] = df['Machine Group'].str.replace('NoCT', '')
df = df[df['Machine'] != 'mixed cases']

## Create the PDF using the API

In [None]:
create_report(config)

## Create the PDF manually (debug mode)

### Create the header

In [None]:
# create a matplotlib figure with the right aspect ratio
fig = plt.figure(figsize=[8.27, 11.69])

machine = 'Discovery'

# header
logging.info("Creating header section")

start_day = dt.strptime(config['main']['start_date'], '%Y-%m-%d')
end_day = dt.strptime(config['main']['end_date'], '%Y-%m-%d')

# analyse the week numbers
week_numbers = sorted(list(set([start_day.strftime('%V'), end_day.strftime('%V')])))
week_numbers_str = '-'.join(week_numbers)
report_type = get_report_type(week_numbers)
logging.info(f"Header content: {report_type}, {week_numbers_str}")

# draw the header text with dates, etc.
plt.rcParams["font.family"] = "monospace"
fig.text(0.62, 0.97, "Rapport {}".format(report_type), fontsize=17)
fig.text(0.62, 0.93, "Semaine{} {}".format('s' if len(week_numbers) > 1 else '', week_numbers_str), fontsize=27, fontweight='bold')
fig.text(0.63, 0.89, "du {}".format(start_day.strftime("%d/%m/%Y")), fontsize=22)
fig.text(0.63, 0.86, "au {}".format(end_day.strftime("%d/%m/%Y")), fontsize=22)

# machine name
fig.text(0.01, 0.82, 'Machine: ' + machine, fontsize=20)

im_machine_path = '{}/images/{}.png'.format(os.getcwd(), machine.lower().replace(' ', '')).replace('/', '\\')
im_machine = plt.imread(get_sample_data(im_machine_path))
im_machine_ax = fig.add_axes([0.40, 0.81, 0.22, 0.05], anchor='NE', zorder=-1)
im_machine_ax.imshow(im_machine)
im_machine_ax.axis('off')

## draw the logo
im_logo_path = '{}/images/logo_transp.png'.format(os.getcwd()).replace('/', '\\')
im_log = plt.imread(get_sample_data(im_logo_path))
im_logo_ax = fig.add_axes([0.00, 0.86, 0.60, 0.13], anchor='NE', zorder=-1)
im_logo_ax.imshow(im_log)
im_logo_ax.axis('off')

fig.savefig('output_{}.pdf'.format(machine.lower().replace(' ', '')), orientation='portrait',
            papertype='a4', format='pdf')
plt.show()

### Create the schedule

In [None]:
set(df['Machine'])

In [None]:
from matplotlib.patches import FancyBboxPatch, Rectangle
from random import random

study_box_w = 0.6

#for machine in set(df['Machine']):
for machine in ['Discovery']:
#for machine in ['Intevo']:
#for machine in ['Millennium']:
#for machine in ['PET GE']:
#for machine in ['PET Siemens']:

    # create a matplotlib figure with the right aspect ratio
    fig = plt.figure(figsize=[8.27, 11.69])


    logging.debug("Creating schedule section")

    logging.debug("Creating schedule plot")
    sched_ax = fig.add_axes([0.00, 0.81, 0.82, 0.40], anchor='NE')

    start_date = dt.strptime(config['main']['start_date'], '%Y-%m-%d')
    end_date = dt.strptime(config['main']['end_date'], '%Y-%m-%d')
    days_range = pd.date_range(start_date, end_date)
    
    days_xticks, days_xtick_labels = [], []
    
    for day in days_range:
        day_str = day.strftime('%Y%m%d')
        logging.debug('Plotting {}'.format(day_str))

        df_day = df.query('Date == "{}" & Machine == "{}"'.format(day_str, machine))
        i_day = (day - start_day).days + 1
        logging.debug('Found {} studies in day {} (day number: {})'.format(len(df_day), day_str, i_day))
        if len(df_day) == 0: continue

        days_xticks.append(i_day)
        days_xtick_labels.append(day.strftime('%d/%m'))
            
        i_study = 0
        for ind in df_day.index:
            study = df_day.loc[ind, :]
            start = pd.to_datetime(study['Start Time'], format='%H%M%S')
            start_hours = start.hour + start.minute / 60 + start.second / 3600
            end = pd.to_datetime(study['End Time'], format='%H%M%S')
            end_hours = end.hour + end.minute / 60 + end.second / 3600
            duration_hours = end_hours - start_hours

            if duration_hours <= 0:
                logging.warning('Problem with study {} on day {}: end time is earlier than start time'.format(study.name, day_str))
                continue

            # get the start and stop times rounded to the minute
            logging.debug('day {}, start {:5.2f} -> end {:5.2f}, duration: {:4.2f}'.format(day_str, start_hours, end_hours, duration_hours))

            x = i_day - study_box_w * 0.5 + 0.05 * (-1 if (i_study % 2 == 0) else 1)
            y = start_hours
            w = study_box_w
            h = duration_hours

            rounded_rect = FancyBboxPatch((x, y), w, h, boxstyle="round,pad=-0.0040,rounding_size=0.155", fc="red", ec='black', mutation_aspect=0.4)
            #rounded_rect = Rectangle((x, y), w, h, fc="red", ec='black')
            sched_ax.add_patch(rounded_rect)

            i_study += 1

    plt.xlim((0.5, 12.5))
    plt.ylim((7.5, 19.5))    
    plt.xticks(days_xticks, days_xtick_labels)
    plt.yticks(range(8,20), ['{:02d}h'.format(i) for i in range(8,20)])
    
    df_period = df.query('Date >= "{}" & Date <= "{}" & Machine == "{}"'.format(start_date.strftime('%Y%m%d'), end_date.strftime('%Y%m%d'), machine))
    freq = '1min'
    start_hour = '070000'
    end_hour = '200000'
    FMT = '%H%M%S'
    time_range = pd.date_range(dt.strptime(start_hour, FMT), dt.strptime(end_hour, FMT), freq=freq)
    df_counts = pd.DataFrame([0]*len(time_range), index=time_range, columns=['count'])
    #display(df_counts)
    for ind in df_period.index:
        start = dt.strptime(df_period.loc[ind, 'Start Time'], FMT)
        end = dt.strptime(df_period.loc[ind, 'End Time'], FMT)
        for t in time_range:
            logging.debug('Checking if time {} is in range of {} - {}'.format(t, start, end))
            logging.debug(start <= t <= end)
            if start <= t <= end: df_counts.loc[t, 'count'] += 1
    #with pd.option_context('display.max_rows', 100): display(df_counts)
    df_counts_resample = df_counts.resample('10min').mean()
    
    distr_ax = fig.add_axes([0.83, 0.81, 0.12, 0.40], anchor='NE')
    counts_y_values = [t.hour + t.minute / 60 + t.second / 3600 for t in df_counts_resample.index]
    plt.plot(df_counts_resample['count'], counts_y_values)
    plt.ylim((7.5, 19.5))
    plt.xticks([])
    plt.yticks([])

    logging.debug("Creating distribution plot")

    #fig.savefig('output.pdf', orientation='portrait', papertype='a4', format='pdf')
    plt.show()

### Create a time-based distribution

In [None]:
df_period = df.query('Date >= "{}" & Date <= "{}" & Machine == "{}"'.format(start_date.strftime('%Y%m%d'), end_date.strftime('%Y%m%d'), machine))
freq = '1min'
start_hour = '080000'
end_hour = '200000'
FMT = '%H%M%S'
time_range = pd.date_range(dt.strptime(start_hour, FMT), dt.strptime(end_hour, FMT), freq=freq)
df_counts = pd.DataFrame([0]*len(time_range), index=time_range, columns=['count'])
#display(df_counts)
for ind in df_period.index:
    start = dt.strptime(df_period.loc[ind, 'Start Time'], FMT)
    end = dt.strptime(df_period.loc[ind, 'End Time'], FMT)
    for t in time_range:
        logging.debug('Checking if time {} is in range of {} - {}'.format(t, start, end))
        logging.debug(start <= t <= end)
        if start <= t <= end: df_counts.loc[t, 'count'] += 1
#with pd.option_context('display.max_rows', 100): display(df_counts)
df_counts_resample = df_counts.resample('10min').mean()

In [None]:
distr_ax = fig.add_axes([0.85, 0.81, 0.12, 0.40], anchor='NE')
counts_y_values = [t.hour + t.minute / 60 + t.second / 3600 for t in df_counts_resample.index]
plt.plot(df_counts_resample['count'], counts_y_values)