# Steps:
1. change notebook title and save based on type of data generating (e.g. halfdose vs. 25% dose)
2. create corresponding patient series folder (e.g. halfdose)
3. change just before save final images the folder name in 2nd to last line) 

# Import packages

In [None]:
import os
import sys
import glob
import matplotlib as plt
import matplotlib.pyplot as plt 
import matplotlib.image as mpimage
import pydicom
from pydicom import dcmread
import numpy as np
import scipy
import shutil
#from tqdm import notebook tqdm as tqdm
import tqdm

In [None]:
#want to create a list of patient accessions (folder names that are accesions)
patients=[]
rootDir= '/mnt/data2/dect/data/dect_pull/Pull/'  
for folders in next(os.walk(rootDir))[1]:
    patients.append(folders)

In [None]:
#first identify acession folders that do not have all 4 subfolders (e.g. 100kvp, iodine, vue, 50kev)
path = '/mnt/data2/dect/data/dect_pull/Pull/'
remove_list = []
import os
for dir, subdir, files in os.walk(path):
    if 1 <= len(subdir) < 4:
        print("{}".format(dir) + " is incomplete with, " + "total subfolders of: " + str(len(subdir)))
        remove_list.append(dir)

In [None]:
#print list of these folders:
remove_list

In [None]:
# ***DELETES THESE FOLDERS ****
import shutil
for item in remove_list:
    shutil.rmtree(item)

In [None]:
#identify exams where #of dcms in 100kvp does not match number in either iodine or vue
mismatch = []
kvp100 = '2.5MM_100_kVp_Like_Stnd'
iodine = '2.5MM_Iodine(Water)_Stnd_AR50'
vue = 'VUE_2.5MM_VUE_Stnd_AR50'
for accession in patients:
    path_total = '/mnt/data2/dect/data/dect_pull/Pull/' + str(accession) + '/**/*.dcm'
    total_size = str(len(glob.glob(path_total, recursive=True)))
    
    path_100 = '/mnt/data2/dect/data/dect_pull/Pull/'+ str(accession) + '/' + kvp100 + '/*.dcm'
    size_100 = str(len(glob.glob(path_100, recursive=True)))
    
    path_iodine = '/mnt/data2/dect/data/dect_pull/Pull/' + str(accession) + '/' + iodine + '/*.dcm'
    size_iodine = str(len(glob.glob(path_iodine, recursive=True)))
    
    path_vue= '/mnt/data2/dect/data/dect_pull/Pull/' + str(accession) + '/' + vue + '/*.dcm'
    size_vue= str(len(glob.glob(path_vue, recursive=True)))
    
    #100 should equal iodine, 100 should equal vue, otw print
    if size_100 != size_iodine:
        print(accession + ' has a mismatch b/ 100 and iodine with' + '100kvp of ' + size_100 + ' and iodine of ' + size_iodine)
        mismatch.append(accession)
        
    elif size_100 != size_vue:
        print(accession + ' has a mismatch b/ 100 and vue with' + '100kvp of ' + size_100 + ' and vue of ' + size_vue)
        mismatch.append(accession)
    
    #else:
        #print('no mismatches')

In [None]:
#print lenght/list of these folders:
len(mismatch)

In [None]:
# ***DELETES THESE FOLDERS ****
import shutil
for item in mismatch:
    path_item = '/mnt/data2/dect/data/dect_pull/Pull/' + str(item)
    shutil.rmtree(path_item)

In [None]:
#'/mnt/data2/dect/data/dect_pull/Pull/'
path = '/mnt/data2/dect/data/dect_pull/Pull/'
for filename in os.listdir(path):
    new_filename=filename.zfill(8)
    os.rename(os.path.join(path, filename), os.path.join(path, new_filename))

In [None]:
## update list of patient accessions after removing mismatches
#'/mnt/data2/dect/data/dect_pull/Pull/'
updated_patients=[]
rootDir= '/mnt/data2/dect/data/dect_pull/Pull/'  
for folders in next(os.walk(rootDir))[1]:
    updated_patients.append(folders)

In [None]:
len(updated_patients)

In [None]:
#create subfolders in each patient
#'/mnt/data2/dect/data/dect_pull/Pull/'
import os
root_path = '/mnt/data2/dect/data/dect_pull/Pull/' 
subfolders = ['25dose'] #[] 'halfdose', 25dose', '15dose', '10dose', '5dose', '(JUST use VUE for 0)']
for patient_id in updated_patients:
    for subfolder in subfolders:
        os.mkdir(os.path.join(root_path,patient_id,subfolder))

## Pre-processing function

In [None]:
#walk a single file or fname through the process, in this case 100kVp file
def processing(file):
        #load dicoms
        ordered_dicom = pydicom.dcmread(file)

        #change file path string to get corresponding iodine & vue
        [file] #convert fnam from str to list
        list_iodine = file.replace('2.5MM_100_kVp_Like_Stnd', '2.5MM_Iodine(Water)_Stnd_AR50')
        iodine_fname = str(list_iodine)
        list_vue = file.replace('2.5MM_100_kVp_Like_Stnd', 'VUE_2.5MM_VUE_Stnd_AR50')
        vue_fname = str(list_vue)
    
        #now read in iodine and vue dicoms with new string file paths

        ordered_iodine_dicom = pydicom.dcmread(iodine_fname)
        ordered_vue_dicom = pydicom.dcmread(vue_fname)

        # generate pixel arrays
        img2d=ordered_dicom.pixel_array
        iodine2d=ordered_iodine_dicom.pixel_array
        vue2d=ordered_vue_dicom.pixel_array

        #modify pixel data
        pwater = ((vue2d+1000)/1000)
        piodine = ((iodine2d/10000))

        mass_atten_iodine=5.0150
        mass_atten_vue=0.1929

        #change amount of iodine/contrast here (e.g. .50 for half dose)
        mass_density_percent_iodine=piodine*0.20*mass_atten_iodine  
        mass_density_vue= pwater*mass_atten_vue  

        lin_atten_70=mass_density_percent_iodine + mass_density_vue

        imageVM = (1000*((lin_atten_70-mass_atten_vue)/mass_atten_vue))

        imageVM=np.array(imageVM)

        imageVMint16 = imageVM.astype(np.uint16)  

        imageVMtobytes = imageVMint16.tobytes()  

        ordered_dicom.PixelData=imageVMtobytes 

        new_fname = file.replace('2.5MM_100_kVp_Like_Stnd', '20dose')

        return ordered_dicom.save_as(new_fname)
        
        print('loaded: {}'.format(new_fname)) 

### LOAD REG (100 KvP IMAGES) 

In [None]:
cd /Volumes/bhavikssd4/AI_DECT/

In [None]:
# load the file names
fnames = []
for fname in glob.glob('**/2.5MM_100_kVp_Like_Stnd/*.dcm', recursive=True):
    #print("loading: {}".format(fname))
    fnames.append((fname))
print("file count: {}".format(len(fnames)))

In [None]:
# ensure they are in the correct order
fnames = sorted(fnames,reverse=True)

### LOAD IODINE IMAGES

In [None]:
# load the file names
fnames_iodine = []
for fname_iodine in glob.glob('**/2.5MM_Iodine(Water)_Stnd_AR50/*.dcm', recursive=True):
    #print("loading: {}".format(fname_iodine))
    fnames_iodine.append((fname_iodine))
print("file count: {}".format(len(fnames_iodine)))

In [None]:
# ensure they are in the correct order
fnames_iodine = sorted(fnames_iodine, reverse=True)

### LOAD VUE SECTION

In [None]:
# load the file names
fnames_vue = []
for fname_vue in glob.glob('**/VUE_2.5MM_VUE_Stnd_AR50/*.dcm', recursive=True):
    #print("loading: {}".format(fname_vue))
    fnames_vue.append((fname_vue))
print("file count: {}".format(len(fnames_vue)))

In [None]:
# ensure they are in the correct order
fnames_vue = sorted(fnames_vue, reverse=True)

## Change filepath string of iodine and vue so they match "IM-****" of 100kVp files

In [None]:
#grabs the "IM-****" from 100kVp fnames
list_fname_strings=[]
for item in fnames:
    list_fname_strings.append(item[-13:])

In [None]:
#joins the "IM****" from 100kVp to iodine path
list_fname_iodine_strings=[]
for i, item in enumerate(fnames_iodine):
    item = item[:-13] + list_fname_strings[i]
    list_fname_iodine_strings.append(item)

In [None]:
#write the filename change
for fname_iodine, fname in zip(fnames_iodine, list_fname_iodine_strings):
    os.rename(fname_iodine, fname)

In [None]:
#joins the "IM****" from 100kVp to vue path
list_fname_vue_strings=[]
for i, item in enumerate(fnames_vue):
    item = item[:-13] + list_fname_strings[i]
    list_fname_vue_strings.append(item)

In [None]:
#write the filename change
for fname_vue, fname in zip(fnames_vue, list_fname_vue_strings):
    os.rename(fname_vue, fname)

## Process files

In [None]:
for file in tqdm(fnames):
    processing(file)