In [None]:
# first need to download the simulated data
# generated by Henry Chan
# wget -i aicdi_data.txt
# the downloaded data is 32x32x32, this script generates 64x64x64 array by padding the downloaded data and add noise

In [1]:
import numpy as np
import matplotlib.pylab as plt
from scipy.ndimage.measurements import center_of_mass as com
from scipy.ndimage.interpolation import shift
from scipy.ndimage.interpolation import rotate as R
from tqdm import tqdm
from joblib import Parallel, delayed
import shutil, tempfile
import os

import multiprocessing as ms
import concurrent.futures

In [2]:
data_folder = './aicdi_data/' #the folder stores the downloaded data

dataname = []

with open(data_folder+'aicdi_data.txt','r') as f:
    txtfile = f.readlines()

for i in range(len(txtfile)):
    tmp = str(txtfile[i]).split('/')[-1]
    dataname.append(tmp.split('\n')[0])

print(len(dataname))

54028


In [3]:
save_folder = './CDI_simulation_upsamp_noise/' #theta

if (not os.path.isdir(save_folder)):
    os.makedirs(save_folder)

In [4]:
M=64
def create_obj(filename):
    data = np.load(data_folder+filename)
    obj = np.fft.ifftn(np.fft.ifftshift(data))
    
    #Zero everything outside object
    obj=np.where(np.abs(obj)<0.1, 0, obj)
    
    obj_upsamp = np.zeros((M,M,M),dtype = np.complex128)
    obj_upsamp[M//2-16:M//2+16,M//2-16:M//2+16,M//2-16:M//2+16] = obj
    
    diff = np.abs(np.fft.fftshift(np.fft.fftn(obj_upsamp)))
    diff=np.random.poisson(lam=diff) #Noise is distribution with sqrt(photons)
    
    diff = np.float32(diff)
    obj_upsamp = np.complex64(obj_upsamp)
        
    return obj_upsamp,diff

In [5]:
def data_generator(filename,save_folder):
    if isinstance(filename, list):
        if len(filename)>1:
            for filename_each in tqdm(filename):
                realspace,farfield = create_obj(filename_each)
                name = str(filename_each).split('.')[0]
                np.savez(save_folder+name+'_0.npz',farfield,realspace)
                
        else:
            realspace,farfield = create_obj(filename[0])
            name = str(filename[0]).split('.')[0]
            np.savez(save_folder+name+'_0.npz',farfield,realspace)
                
    else:
        realspace,farfield = create_obj(filename)
        name = str(filename).split('.')[0]
        np.savez(save_folder+name+'_0.npz',farfield,realspace)
                

In [6]:
n_cores = 12
cores = ms.cpu_count()
if cores > n_cores:
    cores = n_cores
else:
    cores = ms.cpu_count()
print('Use {} cores'.format(cores))

keep_dataname = dataname
ID_list = np.arange(len(keep_dataname)) #len(keep_dataname)
result_list = []

index = np.array_split(ID_list, cores)
chunks_idx_pattern = [[keep_dataname[int(c)] for c in kk] for kk in index]

print(len(chunks_idx_pattern[0]))

with concurrent.futures.ProcessPoolExecutor(max_workers=cores) as executor:

    futures = []
    for kk, idx_list in enumerate(chunks_idx_pattern):

        # start the jobs
        futures.append(
            executor.submit(data_generator, 
                            idx_list,save_folder))

    for future in concurrent.futures.as_completed(futures):

        try:
            result_list.append(future.result())
            # display the status of the program
            Total_iter = cores
            Current_iter = len(result_list)
            percent_iter = Current_iter / Total_iter * 100
            str_bar = '>' * (int(np.ceil(
                percent_iter / 2))) + ' ' * (int(
                    (100 - percent_iter) // 2))
            print(
                '\r' + str_bar + 'processing: [%3.1f%%] ' %
                (percent_iter))

        except:
            print('Error in the parallel calculation')

Use 12 cores
4503


100%|██████████| 4503/4503 [11:18<00:00,  6.63it/s]
 93%|█████████▎| 4173/4502 [11:19<01:04,  5.08it/s]


>>>>>                                             processing: [8.3%] 


100%|██████████| 4502/4502 [11:30<00:00,  6.52it/s]
 96%|█████████▌| 4316/4502 [11:30<00:27,  6.84it/s]


>>>>>>>>>                                         processing: [16.7%] 


100%|██████████| 4502/4502 [11:48<00:00,  6.35it/s]
 97%|█████████▋| 4382/4503 [11:48<00:16,  7.29it/s]


>>>>>>>>>>>>>                                     processing: [25.0%] 


100%|██████████| 4502/4502 [11:58<00:00,  6.27it/s]
 99%|█████████▉| 4476/4503 [11:58<00:03,  7.66it/s]


>>>>>>>>>>>>>>>>>                                 processing: [33.3%] 


100%|██████████| 4503/4503 [12:02<00:00,  6.23it/s]
 99%|█████████▉| 4479/4503 [12:02<00:03,  7.53it/s]


>>>>>>>>>>>>>>>>>>>>>                             processing: [41.7%] 


100%|██████████| 4502/4502 [12:04<00:00,  6.22it/s]
100%|█████████▉| 4492/4503 [12:04<00:01,  6.77it/s]


>>>>>>>>>>>>>>>>>>>>>>>>>                         processing: [50.0%] 


100%|██████████| 4502/4502 [12:04<00:00,  6.21it/s]
100%|█████████▉| 4494/4502 [12:04<00:01,  6.99it/s]


>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>                    processing: [58.3%] 


100%|██████████| 4503/4503 [12:05<00:00,  6.20it/s]
100%|██████████| 4502/4502 [12:05<00:00,  6.20it/s]
100%|█████████▉| 4496/4502 [12:06<00:00,  7.51it/s]


>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>                processing: [66.7%] 

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>            processing: [75.0%] 


100%|██████████| 4502/4502 [12:06<00:00,  6.19it/s]
100%|█████████▉| 4497/4503 [12:06<00:00,  7.13it/s]


>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>        processing: [83.3%] 


100%|█████████▉| 4498/4503 [12:07<00:00,  7.52it/s]
100%|█████████▉| 4499/4503 [12:07<00:00,  7.61it/s]


>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>    processing: [91.7%] 


100%|██████████| 4503/4503 [12:07<00:00,  6.19it/s]



>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>processing: [100.0%] 


In [8]:
dataname_list = save_folder+'3D_upsamp.txt'
# os.remove(dataname_list)
a = open(dataname_list, "a")

for entry in dataname:
    name = str(entry).split('.')[0]
    a.write(name + '_0.npz'+ os.linesep)

a.close()