### Imports

In [None]:
import logging
import datetime
import numpy as np

from datetime import datetime as dt
import pandas as pd

from statistics import mean

from IPython.core.display import display, HTML

import os
os.chdir('C:/TEMP/pySchedVisu/code')
import sys
sys.path.append('src')

from main import run, load_config
from utils import get_day_range
from retrieve_data import *
from extract_data import load_transform_and_save_data_from_files, add_preparation_times
from create_report import create_report, get_report_type

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

# set the width of the notebook
display(HTML("<style>.container { width:95% !important; }</style>"))

### Displays for documentation

In [None]:
config = load_config()
config['main']['start_date'] = '20160101'
config['main']['end_date'] = '20191227'
df_studies, df_series = load_transform_and_save_data_from_files(config)

In [None]:
df_studies.columns

In [None]:
df_studies[['Date', 'Patient ID', 'Machine', 'Machine Group', 'Modality', 'Description', 'Study Description', 'Start Time', 'End Time', 'Start Time Prep', 'End Time Prep']]

In [None]:
df_series.columns

In [None]:
with pd.option_context('max_colwidth', 29): display(df_series[['Date', 'Patient ID', 'Machine', 'Machine Group List', 'Machine Group', 'Modality', 'Institution Name', 'Series Time', 'Start Time',
       'End Time', 'Protocol Name', 'Study Description', 'i_take', 'SUID', 'Study Instance UID', 'Series Instance UID']])

### Tests for "Exclude bad series descriptions #66"

In [None]:
descr_list = """<ALPHA Range[1]>"
"<ALPHA Range[2]>"
"<ALPHA Range>"
"<MIP Range>"
"1.25 BONE"
"177-Lu CONTROLE POST-TRAITEMENT [R"
"177-Lu CONTROLE POST-TRAITEMENT [Recon Flash3D - AC - AC ]"
"177-Lu CONTROLE POST-TRAITEMENT [Recon Flash3D - AC ]"
"177-Lu CONTROLE POST-TRAITEMENT [Recon Flash3D - NoAC - NoAC ]"
"177-Lu CONTROLE POST-TRAITEMENT [Recon Flash3D - NoAC ]"
"177-Lu CONTROLE POST-TRAITEMENT [Recon xSPECT - AC ]"
"1MIP"
"1Processed Images LUNG"
"1Processed Images"
"2 CT XBONE  1.5  B31s STD"
"2 CT XBONE  1.5  B70s OS"
"2 CT XBONE  1.5  eFoV OS"
"2Processed Images LUNG"
"3D Saved State - AutoSave"
"Abdomen LD  2.0  B31s STD"
"AC  2 CT XBONE  2.0  B31s"
"AC  3 CT XQUANT  2.0  B31s"
"AC  Abdomen LD  2.0  B31s"
"AC  Abdomen LD  3.0  I31s"
"AC  Bassin xB 2.0  B31s"
"AC  CT Cerveau  2.0  HD_FoV"
"AC  CT Foie  2.0  HD_FoV"
"AC  CT LU177  3.0  I31s"
"AC  CT Thorax  2.0  HD_FoV"
"AC  CT Thorax  2.0 BH HD_FoV"
"AC  CT Tronc  2.0  HD_FoV"
"AC  CT TRONC  2.0  HD_FoV"
"AC  CT Tronc 2.0  HD_FoV"
"AC  CT Tronc low  2.0  HD_FoV"
"AC  CT Tronc Low  2.0  HD_FoV"
"AC  CT WB  3.0  HD_FoV"
"AC  CT WB low  2.0  HD_FoV"
"AC  HCHprot xbone  2.0  B31s"
"AC  HCHprot xbone  2.0  I31s"
"AC  TetecouF50xB  2.0  B31s"
"AC  Thorax LD  3.0  I31s"
"AC CT  2.0  HD_FoV"
"AC CT 2.0  HD_FoV"
"AC CT Cardiac  3.0   HD_FoV"
"AC CT Tronc  2.0   HD_FoV"
"AC CT Tronc  2.0  HD_FoV"
"AC CT WB  2.0  HD_FoV"
"AC CT WB low  2.0   HD_FoV"
"AW electronic pages"
"AX"
"AXIAL FD MAA"
"AXIAL FUSE"
"Axial Fused"
"AXIAL FUSED"
"Axial Fusion-"
"AXIAL MAA"
"AXIAL"
"axial2"
"AXIALE"
"AXIALES"
"AxialShuntIntestinale"
"Bassin xB 1.0  B31s STD"
"Bassin xB 1.0  B70s BONE"
"Body  3.0  I31s AC"
"COR MN"
"COR"
"coro"
"CORONAL FD MAA"
"coronal"
"CORONAL"
"CORONALE"
"Coronales FUSION 2"
"CORONALS maa"
"COU STD ceCT"
"CT  RECON  OS 1.25"
"CT 0.625 Pied Drt"
"CT 1.25 OS"
"CT 1.25 RECON BONE"
"CT 2.0  I30f  3"
"CT ABDO-PELV 2.5 STD"
"CT ABDOPELV 70S 2.5"
"CT Bassin Bone 1.25"
"CT BASSIN REC1.25 STD ASIR40"
"CT Bone 1.25 ASIR40"
"CT BONE RECON 1.25"
"CT Bone+"
"CT CENTRAGE 3.75 STD ASIR40"
"CT CENTRAGE L4-L5"
"CT CENTRAGE ORL 1.25 mm"
"CT CENTRAGE ORL"
"CT Cerveau 1.25 STD ASIR40"
"CT Cerveau 1.25mm DETAIL"
"CT Cerveau 1.2mm BONE CORR"
"CT Cerveau 3.75 STD ASIR40"
"CT Cerveau 5mm IV ASIR40 CORR"
"CT Chev Drt Cor"
"CT Chev Drt Sag"
"Ct Chev Drt Trans"
"CT COU RECON"
"CT COU-ORL 1.25 STD"
"CT FCH 10MIN LUNG 1.25"
"CT Foie  2.0  I30f"
"CT GA68 TRONC 60MIN recon os"
"CT Lowdose 3.75 STD ASIR40"
"CT LU177  3.0  I31s STD"
"CT lung  1.0  I70f  2"
"CT lung  1.0  I70f low dose"
"CT lung  1.0  I70f"
"CT Lung 1.0  I70f  2"
"CT Lung 1.0  I70f"
"CT LUNG 1.25 CORR"
"CT Lung 1.25 Inspi"
"CT LUNG 1.25 LOW DOSE"
"CT Lung 1.25 RECON"
"CT lung HFNIV 1.0  I50f"
"CT LUNG LOW DOSE 1.25"
"CT LUNG RECON 1.25"
"CT LUNG RECON MAN"
"CT LUNG RECON"
"CT LUNG"
"CT Lung1.5  I70f"
"CT LUNH 1.25 RECON"
"CT ORL RECON  1.25 STD ASIR40"
"CT ORL RECON 1"
"CT ORL RECON 1.25 STD ASIR40"
"CT ORL RECON 2"
"CT ORL RECON STD ASIR40"
"CT ORL RECON"
"CT ORL STD"
"CT ORL"
"CT OS 1.25 RECON"
"CT OS RECON 1.25 2"
"CT Pied Drt Cor"
"CT Pied Drt Sag"
"CT Pied Drt TRANS"
"CT PIED RECON 1.25 BONE"
"CT POUMON RECON"
"CT poumons NUC  std"
"CT PS 1.25 RECON"
"CT PULMONAIRE RESCON 1"
"CT REC MAND 1.25 STD ASIR40"
"CT RECON 1.25 BONE"
"CT RECON 1.25 OS"
"CT recon 1.25 STD ASIR40"
"CT Recon 1.25"
"CT RECON ABDO1.25 STD ASIR40"
"CT RECON BASSIN OS 1.25"
"CT RECON BONE + FOV 50"
"CT RECON BONE 1.25 mm"
"CT RECON BONE 1.25 STD ASIR40"
"CT RECON BONE 1.25"
"CT RECON BONE 1.25*1"
"CT RECON BONE 1.25/1"
"CT RECON BONE JAMBES"
"CT RECON BONE PLUS 1.25"
"CT Recon bone Tronc  1.0  I70f"
"CT Recon Bone Tronc 1.0   I70f"
"CT RECON BONE"
"CT Recon Brain 2"
"CT Recon Brain"
"CT RECON CERVEAU"
"CT Recon Lung  1.0  I70f  2"
"CT Recon Lung 1.0  I70f  2"
"CT Recon Lung 1.25 fov14 nodu"
"CT RECON LUNG 1.25 STD ASIR40"
"CT RECON LUNG"
"CT Recon ORL 1.25"
"CT RECON ORL 2.5 STD ASIR40"
"CT RECON ORL STANDARD"
"CT RECON ORL"
"CT RECON OS 1.25 OMOPLATE"
"CT RECON OS 1.25"
"CT RECON OS BASSIN"
"CT RECON OS FEMUR D"
"CT RECON OS WB 5.0 STD ASIR40"
"CT RECON OS"
"CT RECON OSSEUX"
"CT RECON PULM 1.25"
"CT RECON PULM FOV10"
"CT RECON PULM"
"CT Recon Spianl fov16"
"CT RECON"
"CT TAP 1.25/0.6 STD ASIR40"
"CT TAP 2.5 STD ASIR40 RECON2"
"CT TAP 2.5 STD ASIR40"
"CT TAP RECON 2.5 STD"
"CT TAP RECON OS 1.25"
"CT TETE/THO/ABDO 1.25 BONE+"
"CT THO 2.5 STD"
"CT THO Recon OS 1.25"
"CT THO/ABDO 2.5 STD ASIR40"
"CT ThoAbd Arteriel  1.0  I70f  3"
"CT Thorax  1.0  I30 mou"
"CT Thorax  1.0  I30f"
"CT Thorax  1.0 50 lung"
"CT THORAX 3.75 STD ASIR40"
"CT THORAX ART 2.5"
"CT Thorax BH 1.0  I30 mou"
"CT Thorax BH 1.0 50 lung"
"CT Thorax Lung 1.0  I50f Breathold"
"CT Thorax Lung 1.25"
"CT THORAX RECON IODE"
"CT THORAX RECON PULM 1.25"
"CT Thorax Std mou 1mm  I30f"
"CT THX 1.25 ASIR40"
"CT Tronc  2.0   I30f  3"
"CT Tronc  2.0  I30f"
"CT Tronc 1.25 OS ASIR40"
"CT Tronc 1.25 OSS ASIR40"
"CT Tronc 1.25 RECON BONE SS40"
"CT Tronc 1.25 RECON BONE"
"CT Tronc 1.25 RECON LUNG"
"CT Tronc 1.25 RECON ORL"
"CT Tronc 1.25 RETRO ASIR40"
"CT Tronc 1.25*1 CERVEAU"
"CT Tronc 2.5 STD ASIR40"
"CT Tronc 3.75 BONE"
"CT Tronc 3.75 STD ASIR40"
"CT TRONC 3.75 STD ASIR40"
"CT Tronc BONE 1.25 STD ASIR40"
"CT Tronc low  2.0   I30f  3"
"CT Tronc low  2.0  I30f"
"CT Tronc Low dose"
"CT WB  1.25 ORL STD ASIR40"
"CT WB  2.0   I30f"
"CT WB  2.0  B30f"
"CT WB 1.25 ORL STD ASIR40"
"CT WB 2.5 STD ASIR40"
"CT WB 5.0 RECON LUNG"
"CT WB 5.0 STD ASIR40"
"CT WB 5.0 STD RECON OS 1.25"
"CT WB RECON 1.25 BONE"
"CT WB RECON ORL"
"CTAC COEUR"
"CTAC Rest RECON PULM"
"CTAC Rest"
"e+1 Pet FDG Tronc 3D MAC"
"e+1 Pet FDG Tronc MAC EANM"
"e+2 Pet FDG WB 3D MAC"
"ECG CaScSeq  3.0  HD_FoV  63%"
"ECG CaScSeq  3.0  I30f  3  63% fov 50"
"EPAULE xB  1.0  B70s BONE"
"ETUDE 3BNC117 [Recon xSPECT - AC ]"
"Exported MM Oncology Reading"
"Fdg transax reorient"
"fFUSION AX AC"
"fFUSION COR AC"
"fFUSION SAG AC"
"FISION AX AC"
"FIUSION COR  AC"
"FOIE Y90 MAC SS TOF"
"FSION COR AC"
"FSUION COR XBONE"
"FUCION SAG AC"
"FUION AX AC"
"FUION AXIALE"
"FUISION AX AC"
"FUISION COR AC"
"FUISION SAG AC"
"fuision sagittals"
"FUISON COR MAA"
"FUISON PET/MR SAGITTALES"
"Fuison SAG AC"
"FUSAION AX AC"
"FUSAION SAGITALE"
"FUSIION AX AC"
"FUSIO AX AC"
"FUSIO COR AC BASSIN"
"FUSIO COR AC"
"FUSIOM AX AC"
"FUSOIN AX AC"
"Fuson Cor AC"
"Fuxion AX AC CHEVILLES"
"Fuxion AX AC"
"Fuxion COR AC CHEVILLES"
"Fuxion COR AC"
"Fuxion SAG AC CHEVILLES"
"Fuxion SAG AC"
"GENOUX  LD 1.0  I80s BONE"
"Genxproth_IRACRR_Trans vue post"
"GGL Ax G Sentinelle"
"Hanches xB 1.0  B70s BONE"
"IMAGES CLE"
"KEY IMAGES"
"LEUCO [Recon xSPECT - AC ]"
"Lu-177 [Recon xSPECT - AC ]"
"LUNG RET"
"MAA FOIE [Recon Flash3D - AC ]"
"MAA FOIE [Recon xSPECT - AC ] FUSED"
"MAA FOIE [Recon xSPECT - AC ]"
"MAA FOIE FUSED"
"MAA FOIE FUSION HD"
"MFSC FUSION AX AC"
"MFSC FUSION AX XBONE"
"MFSC FUSION COR AC"
"MFSC FUSION COR XBONE"
"MFSC FUSION SAG AC"
"MFSC FUSION SAG XBONE"
"MIBG [Recon xSPECT - AC ]"
"MixedPatient"
"Os_1Phase [Recon Flash3D - NoAC ]"
"Os_1Phase [Recon xSPECT - AC ]"
"Os_1Phase fusi [Recon xSPECT - AC ]"
"Os_3Phases [Recon Flash3D - NoAC - NoAC ]"
"Os_3Phases [Recon xSPECT - AC ]"
"Perf axiale"
"PerfTransaxial"
"Perfu coronal"
"PerfusionTransaxial"
"PET Cardiac FUSION GLG"
"PET Cardiac Static"
"PET CardioFreeze dual 8 Gates"
"Pet Centrage EANM"
"PET CT FDG Tronc 256 matrix"
"PET FDG Att Tronc 3h"
"Pet FDG Centrage MAC"
"Pet FDG Cerv Dyn 20min MAC"
"Pet FDG Cerv fusion 10min"
"Pet FDG Cerv fusion 11-20min"
"Pet FDG Coeur Stat 10min MAC"
"Pet FDG EANM Tronc-ORL MAC"
"PET FDG HFNIV Flow"
"Pet FDG MAC Tronc-ORL 3D coro"
"Pet FDG MAC Tronc-ORL 3D sag"
"Pet FDG MAC Tronc-ORL 3D"
"PET FDG Tho HFNIV Flow"
"PET FDG Tho HFNIV NAC Flow"
"PET FDG Thorax  Hold AC LM 256"
"PET FDG Thorax  Hold AC LM jop"
"PET FDG Thorax  Hold AC LM"
"Pet FDG Thorax MAC"
"PET FDG Tronc 220 matrix"
"PET FDG Tronc 256 matrix"
"Pet FDG Tronc 3D MAC FUSION AXIALE"
"Pet FDG Tronc 3D MAC fusion pet mr"
"Pet FDG Tronc 3D MAC ok"
"Pet FDG Tronc 3D MAC"
"PET FDG Tronc AC Flow 220 matrix"
"PET FDG Tronc AC Flow 256 matrix"
"PET FDG Tronc AC Flow 3min"
"PET FDG Tronc AC Flow 4min"
"PET FDG Tronc EANM"
"PET FDG Tronc Flow 220 matrix"
"PET FDG Tronc Flow 256 matrix"
"PET FDG Tronc Flow ceCT axial"
"PET FDG Tronc Flow EANM"
"PET FDG Tronc Flow recon corrige"
"PET FDG Tronc Flow"
"PET FDG Tronc MAC 1min30"
"PET FDG Tronc MAC 2min"
"Pet FDG Tronc MAC EANM"
"Pet FDG Tronc MAC"
"PET FDG Tronc OncoFreeze 256 matrix"
"PET FDG Tronc OncoFreeze"
"PET FDG Tronc"
"Pet FDG WB 3D MAC"
"Pet FDG WB EANM"
"PET FDG WB Flow EANM"
"PET FDG WB Flow"
"PET FET Cerveau 40-50 min"
"Pet FET Cerveau 40-50min MAC"
"Pet FET Cerveau MAC 50min"
"Pet FET fusion MAC 50min"
"PET Folate Tronc Flow"
"PET FUSI CFsingle 8 Gates"
"Pet GA68 DOTA Coeur 10min MAC"
"PET Ga68 DOTA Thorax Flow"
"Pet GA68 DOTA Thorax MAC"
"PET Ga68 DOTA Tronc EANM"
"PET Ga68 Dota Tronc Flow"
"Pet GA68 DOTA Tronc MAC"
"PET Ga68 Dotatate Thorax EANM"
"PET Ga68 Dotatate Thorax"
"PET Ga68 Dotatate Tronc"
"PET Ga68 Dotatate WB Flow 220 matrix"
"PET Ga68 DotatateTronc Flow EANM"
"PET Ga68 Dotatoc Tronc Flow"
"PET Ga68 DOTATOC Tronc Flow"
"Pet Ga68 EANM DOTA Tronc"
"Pet Ga68 MAC DOTA Tronc"
"Pet Ga68 MAC PSMA Tronc 2.128"
"Pet Ga68 MAC PSMA Tronc 3MIN"
"Pet Ga68 MAC PSMA Tronc"
"PET Ga68 PSMA Tronc AC"
"PET Ga68 PSMA Tronc Flow"
"PET Ga68 PSMA Tronc"
"PET Ga68 RGD Nodaga Tronc"
"PET Gallium Dotatate Tronc 4i5s5f 220"
"Pet GDOT Coeur Stat 10min MAC fusio"
"Pet GTATE  Thorax MAC fusion rate"
"Pet GTATE  Thorax MAC"
"Pet GTATE Coeur 10min MAC"
"Pet RM"
"Pet Tronc 3D MAC"
"PET TRONC 3D MAC"
"PET Tronc Flow"
"PET Tronc ORL Flow sag"
"PET Tronc ORL Flow"
"PET Tronc ORLFlow 220 matrix"
"Pet Tronc-ORL 3D MAC"
"PET WB 220 matrix"
"PET WB 30sec_bed"
"PET WB Dynamic"
"PET WB Flow 220 matrix"
"PET WB Flow 256 matrix"
"PET WB Flow EANM"
"PET WB Flow"
"Pet WB Ga68 Dota MAC"
"PET WB OncoFreeze"
"PET Y90 30min TOF 2i5s Absolut"
"PET Y90 Foie HD 30min"
"PET Y90 Foie NAC"
"PET Y90 Foie Oncofreeze 20min"
"PET Y90 Fused"
"pet"
"PET/MR CORONALES"
"PET/MR SAGITTALES"
"Pet-CT  FDG Tronc MAC"
"PET-CT FDG Tronc MAC"
"Pet-CT Tronc 3D MAC"
"Pet-CT Tronc EANM"
"Pet-CT Tronc Ga68Dota EANM"
"Pet-CT Tronc Ga68Dota MAC"
"Pet-CT WB 3D MAC"
"Pet-CT WB EANM"
"Petites articulat  1.0  I80s BONE"
"poumons"
"PreMonitoring  10.0  B30s"
"Quant. Perf Analysis: Posterior"
"Range-AC  CT Thorax  2.0  HD_FoV   iMAR-Tra-<ALPHA Range>"
"Range-AC  CT TRONC  2.0  HD_FoV   iMAR-Tra-<ALPHA Range>"
"Range-CT Tronc  2.0  I30f   iMAR-Tra-<ALPHA Range>"
"REC OS DORSO LOMB"
"RECON 1.25 THORAX"
"recon 1.25 tissu nou"
"recon 1.25"
"RECON 1.25"
"recon 1.25mm"
"RECON 1.25mm"
"RECON 1.25ORL"
"recon 2.5mm"
"RECON ABDO BONE 1.25/1"
"RECON BASSIN 1.25"
"Recon Bone 1,25mm"
"RECON BONE 1.25 mm"
"Recon bone 1.25"
"RECON BONE CRANE"
"RECON BONE CT 1.25"
"recon bone tibia"
"RECON BONE"
"RECON CENTREE LUNG"
"RECON CT 1.25*1"
"RECON CT BONE 1.25"
"RECON CT BONE 1.25*1"
"RECON CT BONE 1.25/1"
"RECON CT LUNG"
"RECON CT OS"
"RECON FOV 40"
"recon lung"
"Recon Lung"
"RECON LUNG"
"RECON ORL 1.25"
"RECON ORL 1.25*1"
"RECON ORL 1.25MM"
"RECON ORL"
"RECON OS 1.25"
"RECON OS 1.25MM"
"RECON OS FINE CRANE"
"RECON OS"
"recon oss"
"recon osseuse"
"RECON POUMON"
"RECON POUMONS"
"RECON PROTHESE 1.25 CORRE"
"recon pulm"
"Recon Pulm"
"RECON PULM"
"RECON PULMOAIRE 1.25"
"RECON PULMONAIRE 1.25"
"recon pulmonaire"
"RECON STD CRANE"
"RECON THORAX"
"RECON TRONC BONE 1.25*1"
"recon"
"RECON"
"recons os"
"RECONST PULMO"
"RECONT CT BONE 1.25*1"
"Reformatted"
"Rest  PET CardioFreeze dual 8 Gates"
"Reults"
"Rois01"
"Rois2"
"SAG AC"
"SAG MN"
"SAG"
"SAGITAL FD MAA"
"SAGITAL MAA"
"SAGITAL"
"SAGITALS MAA"
"sagitals"
"SAGITTAL"
"SAGITTALS maa"
"SAVED IMAGES"
"SCINTI OSSEUSE 1 PHASE [Recon xSPECT - AC ]"
"SCINTI OSSEUSE 1 PHASE [Recon xSPECT ABDO - AC ]"
"SCINTI OSSEUSE 3 PHASES [Recon xSPECT - AC ]"
"SCINTI PULM PERFUSEE [Recon xSPECT"
"shunt 1.30%"
"SIG SERIES"
"SIRT MAA [Recon xSPECT - AC ]"
"SIRT MAA SAG"
"Spect  Col Dor_IRAC_Transaxials"
"SPECT [Recon xSPECT - AC ]"
"SPECT ABDO_EM_IRAC_Transaxials"
"SPECT CT PERF PULM AX"
"SPECT CT PERF PULM CORO"
"SPECT CT PERF PULM SAG"
"Spect ct sag"
"Spect hanchepro_IRACRR_Transaxials"
"SPECT HANCHES [Recon xSPECT - AC ]"
"SPECT MAA Foie FUSION AXIAL"
"SPECT MAA Foie FUSION CORONAL"
"SPECT MAA Foie_EM_IRNC_Transaxials"
"SPECT VENT TC_IRNC_Transaxials"
"SPECT x1 Lu-177_EM_IRAC_Transaxials"
"SPECT x1 Lu-177_EM_IRNC_Transaxials"
"SPECT-CT PERF TC_EM_IRAC_Transaxials"
"SPECT-CT PERF TC_EM_IRNC_Transaxials"
"State - +SRest Static 6min Recon MAC"
"State - +SStress Static 6min Recon MAC"
"Static"
"TAP veineux  3.0  I30f  3"
"ThoAbd Arteriel lung  1.0  I70f  3"
"Thorax LD  1.5  I31s Std"
"Thorax LD  1.5  I80s Lung"
"TRANS MN"
"transaxial perf"
"transaxial PERF"
"transaxial vent"
"transaxial"
"TRANSAXIAL"
"TRANSAXIALS maa"
"transaxials"
"TRANSAXIALS"
"TRECON OS"
"TTT poumon"
"Ventil coronal"
"Ventil transaxial"
"VENTILATION TC [Recon xSPECT - AC ]"
"ventilation transax"
"VENTILATION"
"VentilTransaxial"
"Y90 Microspheres [Recon Flash3D - AC ]"
"Y90 Microspheres [Recon Flash3D -"
"Y90 Microspheres Axial Fused"
"Y90 Microspheres Fusion Axial"
"Y90 SPECT/ceCT"
"Y90 SPECT/CT AXIAL"
"Y90 SPECT/CT"""

In [None]:
config = load_config()
config['main']['start_date'] = '20160101'
config['main']['end_date'] = '20191227'

_, df = load_transform_and_save_data_from_files(config)

In [None]:
d = descr_list.split('"\n"')
df['in'] = df['Series Description'].isin(d)
c = df.groupby('Series Description').sum()
c[c['in'] > 0].sort_values('in')

In [None]:
sorted(set([print('^' + pat.lower() + '$') for pat in (set(d)).difference(set(df['Series Description']))]))

In [None]:
(set(df['Series Description'])).difference(set(d))

### Tests for "Schedule visuals #63"

In [None]:
config = load_config()
config['main']['start_date'] = '20191215'
config['main']['end_date'] = '20191220'
machine = 'PET GE'
config['draw']['debug_single_machine'] = machine
config['main']['mode'] = 'single'
config['draw']['debug_save_as_image'] = 'True'
config['draw']['dpi'] = '200'
create_report(config)

### Tests for "Number of slots per day #48"

In [None]:
config = load_config()
config['main']['start_date'] = '20191215'
config['main']['end_date'] = '20191220'
machine = 'PET GE'
config['draw']['debug_single_machine'] = machine
config['main']['mode'] = 'single'
config['draw']['debug_save_as_image'] = 'True'
config['draw']['dpi'] = '100'
create_report(config)

In [None]:
df, df_series_for_day = load_transform_and_save_data_from_files(config)

In [None]:
df.query('Description == "VENT PERF."')

In [None]:
df_vent_perf = df.reset_index().query('Description == "VENT PERF."').groupby(['Patient ID', 'Date', 'Description', 'Machine']).agg({
    'SUID': lambda x: x.iloc[0], 
    'Patient ID': lambda x: x.iloc[0], 
    'Date': lambda x: x.iloc[0], 
    'Description': lambda x: x.iloc[0], 
    'Machine': lambda x: x.iloc[0], 
    'Study Description': lambda x: ' / '.join(set(x)), 
    'Modality': lambda x: '/'.join(set(x)).replace('CT/CT', 'CT').replace('NM/NM', 'NM'), 
    'Machine Group': lambda x: x.iloc[1], 
    'Start Time': 'min',
    'End Time': 'max',
    'Start Time Prep': 'min',
    'End Time Prep': 'max'}).set_index('SUID')
df = df[df['Description'] != 'VENT PERF.']
df = pd.concat([df, df_vent_perf], sort=False).sort_values(['Start Time', 'Machine Group', 'SUID'])
df

In [None]:
df['Date_weekday'] = list(pd.to_datetime(df['Date']).apply(lambda x: x.weekday()))
n_slots_per_day = pd.Series(config['draw']['n_study_per_day_' + machine.lower().replace(' ', '')].split(',')).astype(int)
df['Date_weekday'].apply(lambda x: n_slots_per_day[x]).sum()

In [None]:
with pd.option_context('display.max_rows', 25): display(df.groupby(['Machine', 'Date_weekday'])['Date'].count())
with pd.option_context('display.max_rows', 25): display(df.groupby(['Machine', 'Date'])['Date'].count().groupby(['Machine']).count())
with pd.option_context('display.max_rows', 25): display(df.groupby(['Machine', 'Date_weekday'])['Date'].count() / df.groupby(['Machine', 'Date'])['Date'].count().groupby(['Machine']).count() * 5)

### Tests for "Split OS3PHASES into two parts #27"

In [None]:
config = load_config()
config['main']['start_date'] = '20190101'
config['main']['end_date'] = '20191213'
config['draw']['debug_single_machine'] = 'Discovery 670 NM'
config['main']['mode'] = 'single'
df_studies, df_series = load_transform_and_save_data_from_files(config)
df_studies

In [None]:
start_times = pd.to_datetime(df_studies['Start Time'], format='%H%M%S')
end_times = pd.to_datetime(df_studies['End Time'], format='%H%M%S')
df_studies['duration'] = end_times - start_times

In [None]:
df_studies['i_take'] = list(df_studies.reset_index()['SUID'].apply(lambda x: x.split('_')[-1]))

In [None]:
df_studies.groupby(['Description', 'i_take']).count()

In [None]:
dt.today()

In [None]:
df_studies.groupby(['Description', 'i_take'])['duration'].agg(duration=pd.Series.mean)

### Tests for gaps

In [None]:
config = load_config()
config['main']['start_date'] = '20191209'
config['main']['end_date'] = '20191213'
config['draw']['debug_single_machine'] = 'Discovery 670 NM'
config['main']['mode'] = 'single'
create_report(config)

### Tests for "Add grid lines for each hour behind the schedule plot #35"

In [None]:
config = load_config()
config['main']['start_date'] = '20190902'
config['main']['end_date'] = '20191213'
config['main']['mode'] = 'single'
create_report(config)

### Tests for weird overlappings

In [None]:
config = load_config()
config['main']['start_date'] = '20191202'
config['main']['end_date'] = '20191213'
config['main']['mode'] = 'single'
config['draw']['debug_single_machine'] = 'Discovery 670 NM'
config['draw']['debug_schedule_show_IPP_string'] = 'False'
config['draw']['debug_save_as_image'] = 'False'
config['draw']['dpi'] = '150'
config['extract']['debug_force_extract_studies'] = 'False'
create_report(config)

In [None]:
from extract_data import mark_retakes
df_studies, df_series = load_transform_and_save_data_from_files(config)
df = df_series.query('`Machine Group` == "Discovery 670 NM" and Date == "20191212"')
with pd.option_context('display.max_colwidth', -1): display(df[['Patient ID', 'Modality', 'Start Time', 'End Time', 'Series Description', 'ImageType', 'Study Instance UID', 'i_take']])

In [None]:
df = df_studies.query('Machine == "Discovery 670 NM" and Date == "20191212" and `Patient ID` == "2948602"')
display(df[['Modality', 'Start Time', 'End Time', 'Description']])

### Tests for "Calculate inter study time #49"

In [None]:
config = load_config()
config['main']['start_date'] = '20191209'
config['main']['end_date'] = '20191213'
config['main']['mode'] = 'single'
df_all, _ = load_transform_and_save_data_from_files(config)

In [None]:
df = df_all.query('Machine == "Discovery 670 NM" and Date >= "20191209" and Date <= "20191213"').copy()

In [None]:
FMT = '%H%M%S'
df['Start Time'] = pd.to_datetime(df['Start Time'], format=FMT)
df['End Time'] = pd.to_datetime(df['End Time'], format=FMT)
df['Start Time Prep'] = pd.to_datetime(df['Start Time Prep'], format=FMT)
df['End Time Prep'] = pd.to_datetime(df['End Time Prep'], format=FMT)
# compare the start time of a row with the end time of the previous row
df['time_to_prev'] = df['End Time'].shift() - df['Start Time']
df.loc[df['time_to_prev'] < timedelta(0), 'time_to_prev'] *= -1
df['time_to_prev_prep'] = df['End Time Prep'].shift() - df['Start Time Prep']
df.loc[df['time_to_prev_prep'] < timedelta(0), 'time_to_prev_prep'] *= -1
# compare the end time of a row with the start time of the next row
df['time_to_next'] = df['Start Time'].shift(-1) - df['End Time']
df.loc[df['time_to_next'] < timedelta(0), 'time_to_next'] *= -1
df['time_to_next_prep'] = df['Start Time Prep'].shift(-1) - df['End Time Prep']
df.loc[df['time_to_next_prep'] < timedelta(0), 'time_to_next_prep'] *= -1
# get the fully contained rows
df['fully_contained'] = (df['End Time'] < df['End Time'].shift()) & (df['Start Time'] > df['Start Time'].shift()) & (df['Date'].shift() == df['Date']) & (df['Date'].shift(-1) == df['Date'])
# make sure that we only keep values where the dates are identical
df.loc[df['Date'] != df['Date'].shift(), 'time_to_prev'] = pd.NaT
df.loc[df['Date'] != df['Date'].shift(), 'time_to_prev_prep'] = pd.NaT
df.loc[df['Date'] != df['Date'].shift(-1), 'time_to_next'] = pd.NaT
df.loc[df['Date'] != df['Date'].shift(-1), 'time_to_next_prep'] = pd.NaT
# get the average times
average_times = df.groupby(['Machine', 'Description'])[['time_to_prev', 'time_to_prev_prep', 'time_to_next', 'time_to_next_prep']].agg(pd.Series.mean)
average_times

### Tests for "Add preparation times around study blocks #21"

In [None]:
config = load_config()
config['main']['start_date'] = '20191125'
config['main']['end_date'] = '20191125'
config['main']['mode'] = 'single'
df_all, _ = load_transform_and_save_data_from_files(config)

In [None]:
df = df_all.query('Machine == "PET GE" and Date == "20191125"').copy()
display(df)
df = add_preparation_times(config, df)
display(df)

In [None]:
config = load_config()
config['main']['start_date'] = '20191125'
config['main']['end_date'] = '20191125'
config['main']['mode'] = 'single'
config['draw']['debug_single_machine'] = 'PET GE'
create_report(config)

### Tests for "Add bookmarks to PDF #53"

In [None]:
from PyPDF2 import PdfFileWriter, PdfFileReader
config = load_config()
pdf_output_path, bookmarks = create_report(config)

In [None]:
bookmarks

In [None]:
# store the handles of the created bookmarks
bookmark_handles = {}

# read in the input file and copy it to output while adding bookmarks
with open(pdf_output_path, 'rb') as input_file:
    # define the input and output objects
    reader = PdfFileReader(input_file)
    writer = PdfFileWriter()
    # copy meta data
    metadata = reader.getDocumentInfo()
    writer.addMetadata(metadata)
    # go through the bookmarks
    i_page, parent_bookmark_handle = 0, None
    for bookmark in bookmarks:
        # if we encounter a page we did not copy yet, add it
        if i_page == bookmark.page:
            writer.addPage(reader.getPage(i_page))
            i_page += 1
            print(i_page)
        # if the bookmark has the previous bookmark as a parent
        if bookmark.parent is not None and bookmark.parent in bookmark_handles.keys():
            bookmark_handles[bookmark.title] = \
                writer.addBookmark(bookmark.title, bookmark.page, bookmark_handles[bookmark.parent])
        else:
            bookmark_handles[bookmark.title] = writer.addBookmark(bookmark.title, bookmark.page)

# write out the file
with open(pdf_output_path.replace('.pdf', '2.pdf'), 'wb') as out:
    writer.write(out)

### Tests for "Make report send email #54"

In [None]:
import smtplib
from email.mime.text import MIMEText

config = load_config()
now_str = dt.now().strftime('%Y-%m-%d_%Hh%Mm%Ss')
pdf_output_path = '{}/report_{}.pdf'.format(config['path']['output_dir'], now_str)

In [None]:
# get the relevant part of the config
email = config['email']

# get the body and the subject of the mail
body = email['body']
subject = email['subject']

# split the path into pieces
report_file_name = pdf_output_path.split('/')[-1]
report_folder_path = pdf_output_path.split('/')[:-1]

# replace the relevant parts 
body = body.replace('{__REPORT_PATH__}', pdf_output_path)
body = body.replace('{__REPORT_FOLDER_PATH__}', pdf_output_path)
body = body.replace('{__REPORT_FILE_NAME__}', pdf_output_path)


print(subject)
print(body)
return
# create the body and populate it with headers
msg = MIMEText(body, 'html')
msg['Subject'] = subject
msg['From'] = email['sender_name'] + '<' + email['sender_email'] + '>'
msg['To'] = email['recipients_email']

# create the connection to the server and send the mail
s = smtplib.SMTP(email['smtp_server'])
s.sendmail(email['sender_email'], email['recipients_email'].split(','), msg.as_string())
# close the sessions
s.quit()

### Bookmarks

In [None]:
from PyPDF2 import PdfFileWriter, PdfFileReader
output = PdfFileWriter()
input1 = PdfFileReader(open('C:/TEMP/SchedVisu/outputs/report_2019-12-18_10h47m03s.pdf', 'rb'))
output.addPage(input1.getPage(0))
output.addPage(input1.getPage(1))
output.addPage(input1.getPage(2))
parent = output.addBookmark('TestParent', 0) # add parent bookmark
output.addBookmark('TestChild1', 0, parent) # add child bookmark
output.addBookmark('TestChild2', 1, parent) # add child bookmark
output.addBookmark('TestChild3', 2, parent) # add child bookmark
with open('C:/TEMP/SchedVisu/outputs/test.pdf', 'wb') as out:
    output.write(out)

###  Tests for comparing to VENUS

In [None]:
import matplotlib.pyplot as plt
from create_report import _set_schedule_y_lims
from extract_data import load_transform_and_save_data_from_files
from matplotlib.patches import FancyBboxPatch
import matplotlib.colors as mc
import colorsys

config = load_config()
config['main']['start_date'] = '20191205'
config['main']['end_date'] = '20191205'

# either go through all available machines, or use the list specified by the config
machines_list = sorted(list(set([machine for machine in config['machines'].keys() if 'NoCT' not in machine])))
if config['draw']['debug_single_machine'] != '*':
    machines_list = config['draw']['debug_single_machine'].split(',')

# get the starting and ending dates, and the days range from the config
start_date, end_date, days_range = main.get_day_range(config)

# create a matplotlib figure with the right aspect ratio
fig = plt.figure(figsize=[8.27, 11.69])
    
# create the new axes
sched_ax = fig.add_axes([0.06, 0.42, 0.80, 0.39], anchor='NE')
sched_ax.invert_yaxis()

# create the ticks and labels, with a reduced frequency
_, _, days_range_xticks = main.get_day_range(config, reduce_freq=True)
days_xticks, days_xtick_labels = [], []

# plot each day
i_day = 0
n_days_to_show = len(days_range)

df, _ = load_transform_and_save_data_from_files(config)
# exclude some machines and do some grouping up
df['Machine'] = df['Machine Group'].str.replace('NoCT', '')
df = df[df['Machine'] != 'mixed cases']
    
# go through each machine
for machine in machines_list:

    # get the data for the current day and machine
    df_day = df.query('Machine == "{}"'.format(machine))

    # go through each study found for this machine
    for i_study in range(len(df_day)):
        study = df_day.iloc[i_study, :]

        # get the start time, end time and duration as hours with decimals
        start = pd.to_datetime(study['Start Time'], format='%H%M%S')
        end = pd.to_datetime(study['End Time'], format='%H%M%S')
        start_hour = start.hour + start.minute / 60 + start.second / 3600
        end_hour = end.hour + end.minute / 60 + end.second / 3600
        duration_hours = end_hour - start_hour

        # if the duration is negative
        if duration_hours <= 0: continue

        # get the coordinates where the rounded rectangle for this study should be plotted
        box_w = config['draw'].getfloat('study_box_w')
        x_shift = config['draw'].getfloat('study_x_shift')
        x = i_day - (box_w * 0.5) + (x_shift * (-1 if (i_study % 2 == 0) else 1))
        y, w, h = start_hour, box_w, duration_hours

        # check if we have an overlap issue
        if i_study > 0:
            end_prev = pd.to_datetime(df_day.iloc[i_study - 1, :]['End Time'], format='%H%M%S')
            end_prev_hour = end_prev.hour + end_prev.minute / 60 + end_prev.second / 3600
            
            # check how long the gap was with previous study
            gap_duration_hour = start_hour - end_prev_hour
            gap_threshold = config['draw'].getfloat('gap_dur_minutes_' + machine.lower().replace(' ', ''))
            if gap_duration_hour * 60 >= gap_threshold:
                # plot a black line to show gaps
                plt.plot([i_day, i_day], [start_hour - 0.15, end_prev_hour + 0.15],
                    color='black', linestyle='dashed', linewidth=2)

        # define colors
        descr_list = list(config['description_' + machine.lower().replace(' ', '')].keys()) + ['OTHER']
        colors = colors = config['draw']['colors'].split(',')
        i_descr = descr_list.index(study['Description'])

        # check if the current study is a retake
        try:
            i_take = int(study.name.split('_')[-1])
        except ValueError:
            logging.warning('Problem with study ...{} on {}: got a weird retake number: "{}"'
                .format('.'.join(study.name.split('.')[-2:]), machine,  study.name))
            i_take = 1

        hatch, edge_color = '', 'black'
        if i_take != 1:
            logging.debug(study.name + ' is a retake (reprise)')
            hatch = '/'
            edge_color = 'red'
            sibling_studies_patches = [
                    p for p in sched_ax.patches
                    if p._label.split('_')[0] == study.name.split('_')[0]
                ]
            for p in sibling_studies_patches:
                p.set_hatch('\\')
                p.set_edgecolor('red')

        # if we are displaying more than ~4 months, the inside of the blocks is not visible anymore.
        #   Therefore, we need to use the edge to show the colors
        if len(days_range) > 95:
            edge_color = colors[i_descr]
            hatch = ''

        # create the shape and plot it
        rounded_rect = FancyBboxPatch((x, y), w, h, boxstyle="round,pad=-0.0040,rounding_size=0.155",
            fc=colors[i_descr], ec=edge_color, mutation_aspect=0.3, hatch=hatch, label=study.name)
        sched_ax.add_patch(rounded_rect)

        # DEBUG show information string
        if config['draw'].getboolean('debug_schedule_show_IPP_string'):
            plt.text(x + w * 0.1, y + 0.9 * h, study['Patient ID'], fontsize=8)

    i_day += 1
    
plt.xlim([-0.5, i_day - 0.5])
plt.xticks(range(i_day), machines_list)
# set the y limits
_set_schedule_y_lims(config, df)

fig.savefig('schedvisu_20191205.png', orientation='portrait', papertype='a4', format='png')

### Tests for CTDIvol

In [None]:
#df_all = pd.read_pickle('C:/TEMP/SchedVisu/data/series.pkl')
df_all2019 = df_all.query('Date > "20191101"').copy()
df_noctdi = df_all2019[df_all2019['CTDIvol'].isnull()].copy()
df = df_all2019[~df_all2019['CTDIvol'].isnull()].copy()
print(f'Found {len(df)} series with CTDIvol information out of {len(df_all2019)} series ({len(df_noctdi)} have no information)')
df = df.rename(columns={'CTDIvol':'CTDIvol_ori'})
df['CTDIvol'] = df['CTDIvol_ori'].apply(round)

In [None]:
from extract_data import create_description_consensus
df['Study Description'] = df['Study Description'].replace(np.nan, '')
df = create_description_consensus(config, df)
df_all2019['Study Description'] = df_all2019['Study Description'].replace(np.nan, '')
df_all2019 = create_description_consensus(config, df_all2019)

In [None]:
print(df.columns)
df[['Patient ID', 'Date', 'Modality', 'Start Time', 'End Time', 'Description', 'Study Description', 'Series Description', 'Machine', 'CTDIvol', 'CTDIvol_ori', 'ImageType']].to_excel('CTDI_vol.xls')
df_all2019[['Patient ID', 'Date', 'Modality', 'Start Time', 'End Time', 'Description', 'Study Description', 'Series Description', 'Machine', 'CTDIvol', 'ImageType']].to_excel('CTDI_vol_all.xls')

In [None]:
df[['CTDIvol', 'Description', 'Patient ID']].groupby(['CTDIvol', 'Description'])['Patient ID'].count().sort_values(ascending=False).to_csv('ctdivol_count_description.csv', header=True)
df[['CTDIvol', 'Series Description', 'Patient ID']].groupby(['CTDIvol', 'Series Description'])['Patient ID'].count().sort_values(ascending=False).to_csv('ctdivol_count_series_description.csv', header=True)

In [None]:
with pd.option_context('max_rows', -1): display(df.groupby(['CTDIvol', 'Series Description'])['Patient ID'].count().sort_values(ascending=False))

In [None]:
with pd.option_context('max_rows', -1): display(df.groupby(['CTDIvol', 'Description'])['Patient ID'].count())

### Tests for other exam types

In [None]:
df = pd.read_pickle('C:/TEMP/SchedVisu/data/studies.pkl')
descr = df.query('Description == "OTHER" and Machine == "Millennium"').groupby('Study Description')['Date'].count().sort_values()
descr

### Tests for creating a multi-page PDF

In [None]:
main.run()

### Tests for "Report Type classification wrong #36"

In [None]:
config = load_config()
config['main']['start_date'] = '20191125'
config['main']['end_date'] = '20191219'
start_date, end_date, _ = main.get_day_range(config)
print((end_date - start_date).days)
print(end_date - start_date)
get_report_type(start_date, end_date)

### Tests for "Dose report #5"

###### Prepare the studies and series

In [None]:
df_series = pd.read_pickle('C:/TEMP/SchedVisu_data/series.pkl')
print(df_series.columns)
df_ctdivol = df_series[(~df_series['CTDIvol'].isnull()) | (~df_series['CTDIvol_start'].isnull())][['Date', 'CTDIvol_start', 'Series Description', 'Study Description', 'Protocol Name', 'Modality', 'Patient ID', 'ImageType']]
display(set(df_ctdivol['Series Description']))
display(set(df_ctdivol['Protocol Name']))
display(set(df_ctdivol['Study Description']))
display(df_ctdivol[(~df_ctdivol['Series Description'].str.match('.*low.*')) & (~df_ctdivol['Series Description'].str.match('.*Topogram.*'))])

df_studies = pd.read_pickle('C:/TEMP/SchedVisu_data/studies.pkl')

### Tests for "Fetch more fields from the DICOMs #38"

###### Prepare the studies and series

In [None]:
day = dt(2019, 2, 27)
day_str = day.strftime('%Y%m%d')
patientID = ['2026682', '138821']

config = load_config()
config['main']['start_date'] = day.strftime('%Y%m%d')
config['main']['end_date'] = day.strftime('%Y%m%d')

df_studies = find_studies_for_day(config, day_str, ['PT'])
df_studies = df_studies.query('`Patient ID` in @patientID')

df_series = find_series_for_studies(config, df_studies)

###### Manually fetch the info

In [None]:
df_series_save = df_series.copy()

In [None]:
df_series, df_series_failed = fetch_info_for_series_with_batches(config, df_series_save.copy())

In [None]:
df_series.iloc[0]

## Tests for retrieving all data for calculating correct End Times

### Modify the fetching so that it keeps all info for calculating end times

In [None]:
retrieve_and_save_single_day_data_from_PACS(config, datetime.datetime.strptime('20190111', '%Y%m%d'))

### Run the pipeline for a single day

In [None]:
create_logger()
config = load_config()
config['main']['start_date'] = '2019-01-10'
config['main']['end_date'] = '2019-01-10'

retrieve_and_save_data_from_PACS(config)
load_transform_and_save_data_from_files(config)
create_report(config)

### Prepare for tests

In [None]:
create_logger()
config = load_config()
config['main']['start_date'] = '2019-09-05'
config['main']['end_date'] = '2019-09-05'

In [None]:
retrieve_and_save_single_day_data_from_PACS(config, datetime.datetime(2019, 9, 5))

In [None]:
df = extract_transform_and_save_data_from_files(config)
#display(df)
with pd.option_context("display.max_rows", 1000): display(df)

In [None]:
df_failed_with_info = fetch_info_for_series(config, df_failed)

In [None]:
df2 = pd.concat([df, df_failed_with_info], sort=True)
df2.drop_duplicates('Series Instance UID')

In [None]:
df_rescued_series = df_failed_with_info.copy()
df_failed_series = df_rescued_series[
            (df_rescued_series['Start Time'].isnull())
            | (df_rescued_series['End Time'].isnull())
            | (df_rescued_series['Machine'] == '')
            | (df_rescued_series['Institution Name'] == '')]
# exclude series where some information could still not be gathered (e.g. no end time or no machine)
df_rescued_series = df_rescued_series.loc[~df_rescued_series.index.isin(df_failed_series.index), :]
df2 = pd.concat([df, df_rescued_series], sort=True)
df_failed_series
df2


### Find all studies and series

In [None]:
config = load_config()
config['main']['start_date'] = '2019-01-10'
config['main']['end_date'] = '2019-01-10'
df_studies = find_studies_for_day(config, '20190110', ['PT', 'NM'])
df_studies = df_studies[df_studies['Patient ID'] == '2370187']
df_all_series = find_series_for_studies(config, df_studies)

In [None]:
set(df_all_series[df_all_series['Number of Series Related Instances'].astype(int) > 1].Modality)

In [None]:
df_series = df_all_series.copy()

df_series_subset = pd.concat([df_all_series[df_all_series.Modality == modality].head(n = 30) for modality in set(df_all_series.Modality)]).sort_values('Series Time').reset_index(drop=True)
df_series = df_series_subset.copy()
df_series

In [None]:
df_series = fetch_info_for_series(config, df_series)

In [None]:
df_series

### Step by step before turing it to an API

In [None]:
# list of field names to extract for each modality
to_fetch_fields_ctpt = ['SeriesInstanceUID', 'PatientID', 'InstanceNumber', 'ManufacturerModelName',
    'AcquisitionTime', 'Modality', 'ActualFrameDuration']
to_fetch_fields_nm = ['SeriesInstanceUID', 'PatientID', 'InstanceNumber', 'ManufacturerModelName',
    'AcquisitionTime', 'Modality', 'ActualFrameDuration', 'NumberOfFrames', '0x00540032', '0x00540052']

# create modality specific masks of the DataFrame
df_series_ctpt = df_series[df_series['Modality'].isin(['PT', 'CT'])]
df_series_nm = df_series[df_series['Modality'] == 'NM']
display(df_series_ctpt)
display(df_series_nm)

# prepare the CT/PT queries for the first instance (first image)
query_dicts_ctpt = list(df_series_ctpt.apply(lambda row: {
    'SeriesDate': row['Series Date'],
    'PatientID': row['Patient ID'],
    'SeriesInstanceUID': row['Series Instance UID'],
    'InstanceNumber': '1'
}, axis=1))
# prepare the CT/PT queries for the last instance (last image)
df_last_frames = df_series_ctpt[df_series_ctpt['Number of Series Related Instances'] != '1']
if len(df_last_frames) > 0:
    query_dicts_ctpt.extend(
        df_last_frames.apply(lambda row: {
            'SeriesDate': row['Series Date'],
            'PatientID': row['Patient ID'],
            'SeriesInstanceUID': row['Series Instance UID'],
            'InstanceNumber': row['Number of Series Related Instances']
        }, axis=1))
# fetch the CT/PT data
logging.info('Getting CT/PT data ({} queries)'.format(len(query_dicts_ctpt)))
df_info_ctpt = get_data(config, query_dicts_ctpt, to_fetch_fields_ctpt)

# prepare the NM queries for the first instance (first image)
query_dicts_nm = list(df_series_nm.apply(lambda row: {
    'SeriesDate': row['Series Date'],
    'PatientID': row['Patient ID'],
    'SeriesInstanceUID': row['Series Instance UID']
}, axis=1))
# fetch the NM data
logging.info('Getting NM data ({} queries)'.format(len(query_dicts_nm)))
df_info_nm = get_data(config, query_dicts_nm, to_fetch_fields_nm)

In [None]:
with pd.option_context("display.max_rows", 1000): display(df_info_ctpt.sort_values(['ManufacturerModelName', 'PatientID','SeriesInstanceUID', 'AcquisitionTime', 'InstanceNumber']))
with pd.option_context("display.max_rows", 1000): display(df_info_nm.sort_values(['ManufacturerModelName', 'PatientID', 'SeriesInstanceUID', 'Modality', 'AcquisitionTime', 'InstanceNumber']))

### Save the retrieved info DataFrames

df_series_save = df_series.copy()
df_info_ctpt_save = df_info_ctpt.copy()
df_info_nm_save = df_info_nm.copy()

### Manually process the info and merge it back to the series DataFrame

In [None]:
df_series = df_series_save.copy()
df_info_ctpt_save = df_info_ctpt_save.copy()
df_info_nm_save = df_info_nm_save.copy()

# Process PT/CT images
if len(df_info_ctpt) > 0:

    # get the images with a single instance
    single_instances_UIDs = df_series.loc[
        (df_series['Series Instance UID'].isin(df_info_ctpt['SeriesInstanceUID']))\
        & (df_series['Number of Series Related Instances'] == '1'), 'Series Instance UID']
    logging.info('single_instances_UIDs')
    display(single_instances_UIDs)
    # duplicated them into the info DataFrame, so that they can also be merged together, as if there was two frames
    df_info_ctpt_single_inst = df_info_ctpt[df_info_ctpt['SeriesInstanceUID'].isin(single_instances_UIDs)].copy()
    df_info_ctpt_single_inst['InstanceNumber'] = 999999
    df_info_ctpt_extended = pd.concat([df_info_ctpt, df_info_ctpt_single_inst], sort=True)
    logging.info('df_info_ctpt_extended')
    display(df_info_ctpt_extended)

    # clean up the start times
    df_info_ctpt_extended.loc[:, 'AcquisitionTime'] = df_info_ctpt_extended.loc[:, 'AcquisitionTime']\
        .apply(lambda t: str(t).split('.')[0])

    # regroup the first and last instance rows on a single row
    df_info_ctpt_merged = df_info_ctpt_extended[df_info_ctpt_extended['InstanceNumber'] == 1]\
        .merge(df_info_ctpt_extended[df_info_ctpt_extended['InstanceNumber'] > 1],
               on=['SeriesInstanceUID', 'PatientID', 'ManufacturerModelName', 'Modality'],
               suffixes=['_start', '_end'])
    logging.info('df_info_ctpt_merged')
    display(df_info_ctpt_merged)

    # rename the columns and keep the appropriate ones
    df_info_ctpt_clean = df_info_ctpt_merged.rename(columns={
            'SeriesInstanceUID': 'Series Instance UID',
            'PatientID': 'Patient ID',
            'ManufacturerModelName': 'Machine',
            'AcquisitionTime_start': 'Start Time',
            'AcquisitionTime_end': 'End Time'})\
        .drop(columns=['InstanceNumber_start', 'InstanceNumber_end'])
    logging.info('df_info_ctpt_clean')
    display(df_info_ctpt_clean)
    
    s = pd.to_datetime(df_info_ctpt_clean['Start Time'], format='%H%M%S')
    e = pd.to_datetime(df_info_ctpt_clean['End Time'], format='%H%M%S')
    df_inv = df_info_ctpt_clean[s > e].copy()
    df_inv[['Start Time','End Time']] = df_inv[['End Time','Start Time']]
    df_info_ctpt_clean[s > e] = df_inv
    logging.info('df_info_ctpt_clean 2')
    display(df_info_ctpt_clean)
    
    # merge the info into the series DataFrame
    df_series = df_series.merge(df_info_ctpt_clean, on=['Patient ID', 'Series Instance UID', 'Modality'], how='outer')
    logging.info('df_series 1')
    display(df_series)

    # keep only the relevant columns
    for f in ['Start Time', 'End Time', 'Machine']:
        df_series[f] = df_series[f + '_y'].where(df_series[f + '_y'].notnull(), df_series[f + '_x'])
        df_series.drop(columns=[f + '_y', f + '_x'], inplace=True)
    logging.info('df_series post-CT')
    display(df_series)

# Process NM images
if len(df_info_nm) > 0:
    # clean up the start times
    df_info_nm.loc[:, 'AcquisitionTime'] = df_info_nm.loc[:, 'AcquisitionTime']\
        .apply(lambda t: str(t).split('.')[0])
    # use the AcquisitionTime as Start Time
    df_info_nm['Start Time'] = df_info_nm['AcquisitionTime']
    # call a function to calculate the End Times
    df_info_nm['End Time'] = df_info_nm.apply(get_NM_series_end_time, axis=1)
    # rename the columns and select the appropriate ones
    df_info_nm_clean = df_info_nm.rename(columns={
            'SeriesInstanceUID': 'Series Instance UID',
            'PatientID': 'Patient ID',
            'ManufacturerModelName': 'Machine'})
    # merge the info into the series DataFrame
    df_series = df_series.merge(df_info_nm_clean, on=['Patient ID', 'Series Instance UID', 'Modality'],
        how='outer')
    # keep only the relevant columns
    for f in ['Start Time', 'End Time', 'Machine']:
        df_series[f] = df_series[f + '_y'].where(df_series[f + '_y'].notnull(), df_series[f + '_x'])
        df_series.drop(columns=[f + '_y', f + '_x'], inplace=True)
    logging.info('df_series post-NM')
    display(df_series)

# remove duplicates
df_series = df_series.drop_duplicates('Series Instance UID')

In [None]:
df_series[['Patient ID', 'Modality', 'Start Time', 'End Time']]

display(df_series)
df_series.to_pickle('data/2019/2019-01/2019-01-10.pkl')

### Finish the exploration after all this manual processing

In [None]:
# load in the data
#df_series = load_data_from_files(config)
# mark the rektakes and the machine group for each series
df_series = mark_retakes(config, df_series)
df_series = mark_machine_group(config, df_series)
display(df_series[df_series['Patient ID'] == '2370187'])

df_studies = df_series.replace(np.nan, '').groupby('SUID').agg({
        'Series Date': lambda x: '/'.join(set(x)),
        'Start Time': 'min',
        'End Time': 'max',
        'Study Description': lambda x: '/'.join(set(x)),
        'Patient ID': lambda x: '/'.join(set(x)),
        'Machine Group': lambda x: '/'.join(set(x)),
        'Modality': lambda x: '/'.join(set(x)),
        'Protocol Name': lambda x: '/'.join(set(x))
    }).sort_values(['Series Date', 'Start Time', 'Machine Group', 'SUID'])\
    .rename(columns={'Series Date': 'Date'})

df = df_studies[df_studies['Patient ID'] == '2370187'].copy()
display(df)

df['Machine'] = df['Machine Group'].str.replace('NoCT', '')
df = df[df['Machine'] != 'mixed cases'].drop(columns='Machine Group')
df.sort_values('Machine')[['Patient ID', 'Machine', 'Modality', 'Start Time', 'End Time', 'Study Description']]

# Test to query all images in one query data set

### Fetch info for all "first" CT/PT images

In [None]:
df_ctpt

In [None]:
df_info_ctpt_first = []
while len(df_info_ctpt_first) == 0:
    ds = Dataset()
    ds.QueryRetrieveLevel = 'IMAGE'
    ds.SeriesDate = '20191021'
    ds.SeriesInstanceUID = list(set(df_ctpt['Series Instance UID']))
    ds.PatientID =  list(set(df_ctpt['Patient ID']))
    ds.InstanceNumber = '1'
    ds.Modality = ['CT','PT']

    # fields to fetch from the DICOM header
    to_fetch_fields = ['SeriesInstanceUID', 'PatientID', 'InstanceNumber', 'ManufacturerModelName', 'AcquisitionTime',
        'Modality', 'ImageType', 'ActualFrameDuration', 'NumberOfFrames', '0x00540032', '0x00540052']

    # find information about this series by fetching some images
    df_info_ctpt_first = get_data(config, [ds], to_fetch_fields)

In [None]:
df_info_ctpt_first

### Fetch info for all "last" CT/PT images

In [None]:
df_info_last = []
while len(df_info_last) == 0:
    ds = Dataset()
    ds.QueryRetrieveLevel = 'IMAGE'
    ds.SeriesDate = '20191021'
    ds.SeriesInstanceUID = list(set(df_ctpt['Series Instance UID']))
    ds.PatientID =  list(set(df_ctpt['Patient ID']))
    ds.InstanceNumber = list(set(df_ctpt['Number of Series Related Instances']))
    ds.Modality = ['CT','PT']

    # fields to fetch from the DICOM header
    to_fetch_fields = ['SeriesInstanceUID', 'PatientID', 'InstanceNumber', 'ManufacturerModelName', 'AcquisitionTime',
        'Modality', 'ImageType', 'ActualFrameDuration', 'NumberOfFrames', '0x00540032', '0x00540052']

    # find information about this series by fetching some images
    df_info_last = get_data(config, [ds], to_fetch_fields)

In [None]:
df_info_last.sort_values(by="InstanceNumber").drop_duplicates(subset=["SeriesInstanceUID"], keep="last").reset_index()
df_grouped.index += 1
df_grouped

### Fetch info for all NM images

In [None]:
df_info_nm = []
while len(df_info_nm) == 0:
    ds = Dataset()
    ds.QueryRetrieveLevel = 'IMAGE'
    ds.SeriesDate = '20191021'
    ds.SeriesInstanceUID = list(set(df_nm['Series Instance UID'][0:5]))
    ds.PatientID =  list(set(df_nm['Patient ID'][0:5]))
    ds.Modality = 'NM'

    # fields to fetch from the DICOM header
    to_fetch_fields = ['SeriesInstanceUID', 'PatientID', 'ManufacturerModelName', 'AcquisitionTime',
        'Modality', 'ImageType', 'ActualFrameDuration', 'NumberOfFrames', '0x00540032', '0x00540052']

    # find information about this series by fetching some images
    df_info_nm = get_data(config, [ds], to_fetch_fields)

In [None]:
df_info_nm

###  Merge the results

In [None]:
df_merged = df_series.merge(df_info.drop(columns='Modality').rename(columns={'SeriesInstanceUID': 'Series Instance UID'}), on='Series Instance UID', how='outer')
#df_merged[['Series Date', 'Series Time', 'AcquisitionTime']]
df_merged

In [None]:
df_info = []
while len(df_info) == 0:
    ds = Dataset()
    ds.QueryRetrieveLevel = 'IMAGE'
    ds.SeriesInstanceUID = df_series['Series Instance UID']
    ds.PatientID =  df_series['Patient ID']
    ds.Modality = 'NM'

    # fields to fetch from the DICOM header
    to_fetch_fields = ['SeriesInstanceUID', 'PatientID', 'InstanceNumber', 'ManufacturerModelName', 'AcquisitionTime',
        'Modality', 'ImageType', 'ActualFrameDuration', 'NumberOfFrames', '0x00540032', '0x00540052']

    # find information about this series by fetching some images
    df_info = get_data(config, [ds], to_fetch_fields)