Tests for retrieving data from PACS
--

Imports
--

In [None]:
import logging
import configparser
import pandas as pd
from collections import namedtuple

from datetime import datetime as dt

from IPython.core import display as ICD

from pydicom.dataset import Dataset

from scripts.run_all import run_all
from scripts.retrieve_data_from_PACS import *

#from pynetdicom import debug_logger
#debug_logger()

# set the width of display to infinite for all pandas DataFrame
pd.set_option('display.max_colwidth', -1)
# set the level of pynetdicom module's logger to ERROR, to avoid any logs
logging.getLogger('pynetdicom').setLevel(logging.ERROR)

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

Initialize the "config" object
--

In [None]:
config = run_all()

Get all PT studies for a day
--

In [None]:
df_PT_studies = find_studies_for_day(config, '20190806', 'PT')
df_PT_studies

Get all PT and CT series for the found PT studies and get their time ranges
--

In [None]:
for i_study in range(len(df_PT_studies)):
    logging.debug('DataFrame row:\n' + str(df_PT_studies.loc[i_study, :]))
    df_series = find_series_for_study(config, df_PT_studies.loc[i_study, :])

    for i_series in range(len(df_series)):
        logging.debug('Series: ' + str(df_series.loc[i_series, :]))
        row_info = fetch_info_for_series(config, df_series.loc[i_series, :], 'PT')
        
        if row_info is None:
            logging.error('Skipping series {}: no data found.'.format(df_series.loc[i_series, 'Series Instance UID']))
            continue

        # copy the relevant parameters into the main DataFrame
        df_series.loc[i_series, 'start_time'] = row_info['start_time']
        df_series.loc[i_series, 'end_time'] = row_info['end_time']
        df_series.loc[i_series, 'machine'] = row_info['machine']
    
    # remove redundant series
    df_series = prunes_series_by_time_overlap(df_series)
    
    # create time ranges from the start/end times
    time_ranges = []
    for i_serie in range(len(df_series)):
        time_ranges.append('{}-{}'.format(df_series.loc[i_serie, 'start_time'],
                                          df_series.loc[i_serie, 'end_time']))
    
    # propagate back the time range and machine name information
    df_PT_studies.loc[i_study, 'machines'] = ','.join(list(set(df_series['machine'])))
    df_PT_studies.loc[i_study, 'time_ranges'] = ','.join(time_ranges)
  

There was a problem with: 20190806|133816|PT|PID:3124294|1.2.840.....214

In [None]:
df_PT_studies

In [None]:
set([m for m_list in df_PT_studies['machines'].str.split(',') for m in m_list])

Get all NM studies for a single day
--

In [None]:
df_NM_studies = find_studies_for_day(config, '20190806', 'NM')
df_NM_studies = df_NM_studies[df_NM_studies['Patient ID'].str.match('^\d+$')]
df_NM_studies = df_NM_studies[~df_NM_studies['Study Description'].isin(['EXTRINSEQUE'])]
df_NM_studies.reset_index(drop=True, inplace=True)
df_NM_studies

Get all NM and CT series for the found NM studies and get their time ranges
--

In [None]:
#for i_study in range(len(df_NM_studies)):
for i_study in range(1):
    logging.debug('DataFrame row:\n' + str(df_NM_studies.loc[i_study, :]))
    df_series = find_series_for_study(config, df_NM_studies.loc[i_study, :])
    
    # filter out irrelevant series
    df_series = df_series[~df_series['Protocol Name'].isin(['SCREENCAPTURE'])]
    df_series = df_series[~df_series['Series Description'].isin(['Renal_Results'])]
    df_series.reset_index(drop=True, inplace=True)
    
    for i_series in range(len(df_series)):
        logging.debug('Series: ' + str(df_series.loc[i_series, :]))
        row_info = fetch_info_for_series(config, df_series.loc[i_series, :], 'NM')
        
        if row_info is None:
            logging.error('Skipping series {}: no data found.'.format(df_series.loc[i_series, 'Series Instance UID']))
            continue

        # copy the relevant parameters into the main DataFrame
        df_series.loc[i_series, 'start_time'] = row_info['start_time']
        df_series.loc[i_series, 'end_time'] = row_info['end_time']
        df_series.loc[i_series, 'machine'] = row_info['machine']
    
    # create time ranges from the start/end times
    time_ranges = []
    for i_serie in range(len(df_series)):
        time_ranges.append('{}-{}'.format(df_series.loc[i_serie, 'start_time'],
                                          df_series.loc[i_serie, 'end_time']))
    
    # propagate back the time range and machine name information
    df_NM_studies.loc[i_study, 'machines'] = ','.join(list(set(df_series['machine'])))
    df_NM_studies.loc[i_study, 'time_ranges'] = ','.join(time_ranges)
    
df_NM_studies

Find all 'PT' and 'NM' studies for a day
--

In [None]:
df_studies = find_studies_for_day(config, '20190806', ['PT', 'NM'])
df_studies = df_studies[df_studies['Patient ID'].str.match('^\d+$')]
df_studies = df_studies[~df_studies['Study Description'].isin(['EXTRINSEQUE'])]
df_studies.reset_index(drop=True, inplace=True)
df_studies

Get all series for the found studies and get their time ranges
--

In [None]:
for i_study in range(len(df_studies)):
    logging.debug('DataFrame row:\n' + str(df_studies.loc[i_study, :]))
    df_series = find_series_for_study(config, df_studies.loc[i_study, :])
    
    for i_series in range(len(df_series)):
        logging.debug('Series: ' + str(df_series.loc[i_series, :]))
        row_info = fetch_info_for_series(config, df_series.loc[i_series, :])
        
        if row_info is None:
            logging.error('Skipping series {}: no data found.'.format(df_series.loc[i_series, 'Series Instance UID']))
            continue

        # copy the relevant parameters into the main DataFrame
        df_series.loc[i_series, 'start_time'] = row_info['start_time']
        df_series.loc[i_series, 'end_time'] = row_info['end_time']
        df_series.loc[i_series, 'machine'] = row_info['machine']
    
    # remove redundant series
    df_series = prunes_series_by_time_overlap(df_series)
    
    # create time ranges from the start/end times
    time_ranges = []
    for i_serie in range(len(df_series)):
        time_ranges.append('{}-{}'.format(df_series.loc[i_serie, 'start_time'],
                                          df_series.loc[i_serie, 'end_time']))
    
    # propagate back the time range and machine name information
    df_studies.loc[i_study, 'machines'] = ','.join(list(set(df_series['machine'])))
    df_studies.loc[i_study, 'time_ranges'] = ','.join(time_ranges)
    df_studies.loc[i_study, 'overall_time_range'] = '{}-{}'.format(
        df_series.iloc[0]['start_time'], df_series.iloc[-1]['end_time'])
    
df_studies.loc[0:11, :]