### Imports

In [None]:
import logging
from IPython.core import display as ICD
from scripts.main import *
from scripts.retrieve_data_from_PACS import *
#pd.set_option('display.max_colwidth', -1)
#pd.set_option('display.max_rows', None)
# set the level of pynetdicom module's logger to ERROR, to avoid any logs
logging.getLogger('pynetdicom').setLevel(logging.ERROR)
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

### Initialize the "config" object

In [None]:
config = run_all()

## Tests for retrieving data from PACS

In [None]:
from datetime import datetime
start_date = datetime.strptime(config['main']['start_date'], '%Y%m%d')
end_date = datetime.strptime(config['main']['end_date'], '%Y%m%d')
daterange = pd.date_range(start_date, end_date)
for single_date in daterange:
    logging.info(single_date.strftime("%Y-%m-%d"))

### Find all 'PT' and 'NM' studies for a day

In [None]:
df_studies = find_studies_for_day(config, config['main']['start_date'], ['PT', 'NM'])
df_studies.iloc[:, [0,1,2,3,4]]

### Get all series for the found studies

In [None]:
df_series = find_series_for_studies(config, df_studies)
df_series.iloc[:, [0,1,2,3,4,5,10]]

### Go through each series and find information about them

In [None]:
df_series = fetch_info_for_series(config, df_series)

### Get some statistics on the success / failure rates of fetching info for SERIES

In [None]:
show_stats_for_fetching_series_info(df_series)

### Exclude series where no information could be gathered

In [None]:
# extract the sub-DataFrame that do or do not have information
df_with_info = df_series[~df_series['end_time'].isnull()]
df_no_info = df_series[df_series['end_time'].isnull()]
# count rows and display
n, n_info, n_noinfo = len(df_series), len(df_with_info), len(df_no_info)
logging.info('{:3d}/{:3d} rows with    info ({:.1f} %)'.format(n_info, n, 100 * n_info / n, ))
logging.info('{:3d}/{:3d} rows without info ({:.1f} %)'.format(n_noinfo, n, 100 * n_noinfo / n))

# filter out series where there is no information
df_series = df_series[~df_series.start_time.isnull()]

### Get a summary of what machines are used in which institution names and modality

In [None]:
df_groupby = do_series_groupby(config, df_series)
df_groupby

### Mark the series as being a first or a second take

In [None]:
df_series = mark_second_takes(config, df_series)

###  Show the series that have a second take

In [None]:
df = df_series[df_series['Study Instance UID'].isin(df_series[df_series['i_take'] != 1]['Study Instance UID'])]
df.iloc[:,[0,1,2,5,11,12,13,17]]

### Create a unique ID taking the second takes into account

In [None]:
# keep only the relevant rows
df_series_pruned = df_series.loc[indices_to_keep].sort_values(['Patient ID', 'Series Time'])

# create a column of unique ID (including the information about second takes)
df_series_pruned['UID'] = ''
i_UID = 0

# create a unique ID for the relevant series
for ind in df_series_pruned.index:
    UID = '{}-{}'.format(*df_series_pruned.loc[ind, ['Series Date', 'Patient ID']])
    UID += '-{:04d}-A'.format(i_UID)
    i_UID += 1
    df_series_pruned.loc[ind, 'UID'] = UID

df_series_pruned.iloc[:,[0,1,2,5,12,13,14,16,17]]

### Rename the machines to have some consensus

In [None]:
machine_names = ['Vision 600', 'Discovery 690', 'Millennium MPR', 'Intevo 16', 'Discovery 670']

"""
Biograph64/vision PT
discovery 690 PT
*discovery 670 SPECT
Millennium SPECT sans CT
Encore2/Intevo SPECT
"""

for machine_name in machine_names:
    matching_rows = df['Machine'].str.match('.*' + machine_name + '.*', case=False)
    if matching_rows.sum() > 0:
        logging.info('Found {} rows matching the name "{}":'.format(matching_rows.sum(), machine_name))
    df.loc[matching_rows, 'Machine'] = machine_name

# replace the "Encore2" machine name to "Intevo", since it is the same machine
#df.loc[df['Machine'] == 'Encore2', 'Machine'] = 'Intevo 16'
#machine_names.remove('Encore2')

df

### Rename the descriptions to have some consensus

In [None]:
description_patterns = {'FDG Corps Entier': 'fdgcorpsentier', 'FDG Tronc': 'fdgtronc', 'Rb82 Coeur': 'rb82coeur',
                       'FDG Abdomen TAP Veineux Corps Entier': 'abdomen1fdgtapveineuxpetcorpsentierflowadult',
                       'Scintigraphie OctreoScan': 'scintioctreoscan', 'FDG WB Child': 'pet1petfdgwbflowchild'}
for descr in description_patterns.keys():
    matching_rows = df['descr'].str.lower().str.replace('[-_^ ()]', '').str\
        .match('.*' + description_patterns[descr] + '.*', case=False)
    if matching_rows.sum() > 0:
        logging.info('Found {} rows matching the name "{}":'.format(matching_rows.sum(), descr))
    df.loc[matching_rows, 'descr'] = descr

df