### Imports

In [None]:
import logging
import time
from copy import deepcopy

from IPython.core.display import display, HTML

import os
os.chdir('H:/Mes Documents/ServiceCivil2019/schedvisu')
import sys
sys.path.append('scripts')

from scripts.main import load_config, get_day_range
from retrieve_data import retrieve_and_save_data_from_PACS
from extract_data import load_transform_and_save_data_from_files
from create_report import create_report, get_report_type

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

# set the width of the notebook
display(HTML("<style>.container { width:95% !important; }</style>"))

###  Run the whole pipeline

In [None]:
run()
logging.shutdown()

###  Run the whole pipeline for 4 years, starting by 2019

In [None]:
date_ranges = [
    {'start': '2019-01-01', 'end': '2019-12-06'},
    {'start': '2018-01-01', 'end': '2018-12-31'},
    {'start': '2017-01-01', 'end': '2017-12-31'},
    {'start': '2016-01-01', 'end': '2016-12-31'}
]

config = load_config()
for date_range in date_ranges:
    try:
        create_logger()
        local_config = deepcopy(config)
        local_config['main']['start_date'] = date_range['start']
        local_config['main']['end_date'] = date_range['end']
        run_pipeline(local_config)

    except Exception as e:
        logging.error('Error while running workflow')
        logging.error("-"*60)
        logging.error(e, exc_info=True)
        logging.error("-"*60)

    except KeyboardInterrupt:
        logging.error('Interrupted by user')

    finally:
        logging.shutdown()
        time.sleep(1)

### Load the configuration

In [None]:
config = load_config()

### Get the series

In [None]:
df = extract_transform_and_save_data_from_files(config)

### Do the grouping by

In [None]:
df, df_count_series, df_count_studies = do_series_groupby(config, df)
display(df_count_series)
display(df_count_studies)
display(df_count_series.groupby('Machine Group').sum())
display(df_count_studies.groupby('Machine Group').sum())

### Clean up the data (while changing things in the API so the bugs/errors do not appear anymore)

In [None]:
df = df[~df['Study Instance UID'].isin(df[df['Institution Name'] == 'Hopital neuchatelois']['Study Instance UID'])]
df = df[~df['Study Instance UID'].isin(df[df['Institution Name'] == 'MEDECINE NUCLEAIRE']['Study Instance UID'])]
df = df[~df['Study Instance UID'].isin(df[df['Machine'] == '']['Study Instance UID'])]

### Check why some non-valid institution name went through the filter

In [None]:
df_bad_series = df[df['Study Instance UID'].isin(df[df['Machine'] == 'Ingenuity TF PET/CT']['Study Instance UID'])]

if len(df_bad_series) > 0:
    bad_study_UID = list(set(df_bad_series['Study Instance UID'].values))[0]
    logging.info('bad_study_UID: ' + bad_study_UID)
    
    inst_name = list(set([inst_name.replace('  ', ' ') for inst_name in df_bad_series.loc[:, 'Institution Name']]))[0]
    logging.info('inst_name: ' + inst_name)
    logging.info('accepted_inst_names: ' + str(accepted_inst_names))
    logging.info('inst_name is in accepted_inst_names? ' + str(inst_name.lower().replace(' ', '') in accepted_inst_names))
    logging.info('date: ' + str(list(set(df_bad_series['Series Date']))[0]))
    
    df_studies = find_studies_for_day(config, '20191029', ['PT', 'NM'])
    df_bad_study = df_studies[df_studies['Study Instance UID'] == bad_study_UID]
    df_series = find_series_for_studies(config, df_bad_study)
    df_series

### Solve the problem of having some studies with mixed machine names

In [None]:
# find a study which has both the millennium and another machine
df_series_for_study = df[df['Study Instance UID'].isin(df[df['Machine'] == 'MILLENNIUM MPR']['Study Instance UID'])]
df_series_for_study[df_series_for_study['Study Instance UID'].isin(df_series_for_study[df_series_for_study['Machine'] == 'BrightSpeed']['Study Instance UID'])]

### Debug the processing of some unfetchable rows

### Close the logging

In [None]:
logging.shutdown()