In [None]:
!pwd

In [None]:
# Load paths for using psana
%env SIT_ROOT=/reg/g/psdm/
%env SIT_DATA=/cds/group/psdm/data/
%env SIT_PSDM_DATA=/cds/data/psdm/

In [1]:
import numpy as np
import os
import h5py
import random
import pickle

In [2]:
import matplotlib              as mpl
import matplotlib.pyplot       as plt
import matplotlib.colors       as mcolors
import matplotlib.patches      as mpatches
import matplotlib.transforms   as mtransforms
import matplotlib.font_manager as font_manager
%matplotlib inline

In [3]:
from deepprojection.plugins import PsanaImg
from deepprojection.utils   import split_dataset, set_seed

In [None]:
exp           = 'amo06516'
run           = '90'
mode          = 'idx'
detector_name = 'Camp.0:pnCCD.0'

psana_img = PsanaImg( exp           = exp,
                      run           = run,
                      mode          = mode,
                      detector_name = detector_name, )

In [None]:
path_h5 = 'skopi/h5s.pnccd.6Q5U/6Q5U.4_hit.h5'
# path_h5 = 'skopi/h5s.pnccd.5VFR/5VFR.4_hit.h5'
# path_h5 = 'skopi/h5s.pnccd.5IJO/5IJO.1_hit.h5'
# path_h5 = 'skopi/h5s.pnccd.3ZIF/3ZIF.2_hit.h5'

In [None]:
fh = h5py.File(path_h5, 'r')

In [None]:
orientation = fh.get('orientations')[0]
orientation

In [None]:
len(fh.get('photons'))

In [None]:
for i in [ random.randrange(0, len(fh.get('photons'))) for _ in range(20) ]:
    data = fh.get('photons')[i]
    img = psana_img.get(0, data)
    plt.figure(figsize = (10, 8))
    data = img
    vmin = data.mean() - 0.4 * data.std()
    vmax = data.mean() + 2.0 * data.std()
    plt.imshow(data, vmin = vmin, vmax = vmax)
    plt.colorbar()

In [None]:
fh.close()

In [None]:
with h5py.File(path_h5, 'r') as fh:
    for i in [ random.randrange(0, len(fh.get('photons'))) for _ in range(20) ]:
        data = fh.get('photons')[i]
        img = psana_img.get(0, data)
        plt.figure(figsize = (10, 8))
        data = img
        vmin = data.mean() - 0.4 * data.std()
        vmax = data.mean() + 2.0 * data.std()
        plt.imshow(data, vmin = vmin, vmax = vmax)
        plt.colorbar()

#### Create fastdata

In [4]:
seed = 0
random.seed(seed)

In [5]:
def create_path_label_list(pdb, drc):
    path_label_list = []
    for i in range(1, 4 + 1):
        fl_h5   = f"{pdb}.{i}_hit.h5"
        path_h5 = os.path.join(drc, fl_h5)
        label   = (pdb, i)
        path_label_list.append((path_h5, label))
    
    return path_label_list

In [6]:
drc = 'skopi/h5s_mini.sq'
create_path_label_list("6Q5U", drc)

[('skopi/h5s_mini.sq/6Q5U.1_hit.h5', ('6Q5U', 1)),
 ('skopi/h5s_mini.sq/6Q5U.2_hit.h5', ('6Q5U', 2)),
 ('skopi/h5s_mini.sq/6Q5U.3_hit.h5', ('6Q5U', 3)),
 ('skopi/h5s_mini.sq/6Q5U.4_hit.h5', ('6Q5U', 4))]

#### Label corrected (This)

In [7]:
path_dat = 'skopi/pdb_num.dat'
data = open(path_dat).readlines()
pdb_size_dict = {}
for d in data:
    pdb, mol_size = d.strip().split()
    pdb_size_dict[pdb] = int(mol_size)
size_list = list(pdb_size_dict.values())
size_nplist = np.array(size_list)
hy, hx = np.histogram(size_nplist, bins = 50)
size_pdb_dict = {}
for enum_i, (size_min, size_max) in enumerate(zip(hx[:-1], hx[1:])):
    size_pdb_dict[enum_i] = [ pdb for pdb, mol_size in pdb_size_dict.items() if size_min < mol_size < size_max ]

In [8]:
size_pdb_dict.keys()    # number of buckets

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [9]:
[ (key, len(val)) for key, val in size_pdb_dict.items()  ]

[(0, 5640),
 (1, 33),
 (2, 15),
 (3, 7),
 (4, 6),
 (5, 2),
 (6, 1),
 (7, 1),
 (8, 3),
 (9, 2),
 (10, 1),
 (11, 0),
 (12, 1),
 (13, 1),
 (14, 5),
 (15, 1),
 (16, 11),
 (17, 8),
 (18, 14),
 (19, 2),
 (20, 2),
 (21, 1),
 (22, 0),
 (23, 2),
 (24, 0),
 (25, 1),
 (26, 0),
 (27, 1),
 (28, 3),
 (29, 0),
 (30, 2),
 (31, 0),
 (32, 3),
 (33, 0),
 (34, 0),
 (35, 0),
 (36, 0),
 (37, 0),
 (38, 0),
 (39, 0),
 (40, 0),
 (41, 1),
 (42, 1),
 (43, 1),
 (44, 1),
 (45, 1),
 (46, 1),
 (47, 0),
 (48, 0),
 (49, 1)]

In [10]:
bucket = 18
pdb_candidate_list = size_pdb_dict[bucket]
num_pdb_for_test = 100
num_pdb_for_test = min(len(pdb_candidate_list), num_pdb_for_test)
pdb_list = random.sample(pdb_candidate_list, k = num_pdb_for_test)

In [11]:
for pdb in pdb_list:
    data_per_pdb_list = []
    path_label_list = create_path_label_list(pdb, drc)
    mask = np.load('skopi/h5s_mini.sq/mask.npy')
    for path_h5, label in path_label_list:
        data_list = []
        
        # Correct label...
        pdb, hit_type = label
        hit_type = min(hit_type, 2)
        label_corrected = (pdb, hit_type) 
        
        with h5py.File(path_h5, 'r') as fh:     
            for idx, img in enumerate(fh.get('photons')):
                img = img * mask[None,]
                # data_list.append((img[0], label, f"{label[0]} {label[1]} {idx}"))
                data_list.append((img[0], label_corrected, f"{label_corrected[0]} {label_corrected[1]}({label[1]}) {idx}"))
        data_per_pdb_list.extend(data_list)
        
    path_pickle = f'fastdata.h5/{pdb}.relabel.pickle'
    with open(path_pickle, 'wb') as handle:
        pickle.dump(data_per_pdb_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
pdb_list.index('7EH1')

In [None]:
pdb_list

In [None]:
path_pickle

In [None]:
global_data_list[99]

#### Create fastdata

In [None]:
seed = 0
random.seed(seed)

In [None]:
hit_global_list = []

In [None]:
path_h5 = 'skopi/h5s.pnccd.6Q5U/6Q5U.1_hit.h5'
num_hit = 200
with h5py.File(path_h5, 'r') as fh:
    hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
    hit_global_list.extend([ psana_img.get(0, fh.get('photons')[hit_idx]) for hit_idx in hit_list])

In [None]:
path_h5_list = [ ('skopi/h5s.pnccd.6Q5U/6Q5U.1_hit.h5', 1, 180),
                 ('skopi/h5s.pnccd.6Q5U/6Q5U.2_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.6Q5U/6Q5U.3_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.6Q5U/6Q5U.4_hit.h5', 2, 60), ]
# num_hit = 200
hit_global_list = []
for path_h5, label, num_hit in path_h5_list:
    with h5py.File(path_h5, 'r') as fh:
        hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
        hit_global_list.extend([ (psana_img.get(0, fh.get('photons')[hit_idx]), label, (path_h5, str(hit_idx), str(label))) for hit_idx in hit_list])

In [None]:
import pickle
path_pickle = 'fastdata.h5/6Q5U.pnccd.pickle'
with open(path_pickle, 'wb') as handle:
    pickle.dump(hit_global_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
hit_global_list[0]

#### Create fastdata

In [None]:
seed = 0
random.seed(seed)

In [None]:
hit_global_list = []

In [None]:
path_h5 = 'skopi/h5s.pnccd.3ZIF/3ZIF.1_hit.h5'
num_hit = 200
with h5py.File(path_h5, 'r') as fh:
    hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
    hit_global_list.extend([ psana_img.get(0, fh.get('photons')[hit_idx]) for hit_idx in hit_list])

In [None]:
path_h5_list = [ ('skopi/h5s.pnccd.3ZIF/3ZIF.1_hit.h5', 1, 180),
                 ('skopi/h5s.pnccd.3ZIF/3ZIF.2_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.3ZIF/3ZIF.3_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.3ZIF/3ZIF.4_hit.h5', 2, 60), ]
# num_hit = 200
hit_global_list = []
for path_h5, label, num_hit in path_h5_list:
    with h5py.File(path_h5, 'r') as fh:
        hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
        hit_global_list.extend([ (psana_img.get(0, fh.get('photons')[hit_idx]), label, (path_h5, str(hit_idx), str(label))) for hit_idx in hit_list])

In [None]:
import pickle
path_pickle = 'fastdata.h5/3ZIF.pnccd.pickle'
with open(path_pickle, 'wb') as handle:
    pickle.dump(hit_global_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

#### Create fastdata

In [None]:
seed = 0
random.seed(seed)

In [None]:
hit_global_list = []

In [None]:
path_h5 = 'skopi/h5s.pnccd.5IJO/5IJO.1_hit.h5'
num_hit = 200
with h5py.File(path_h5, 'r') as fh:
    hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
    hit_global_list.extend([ psana_img.get(0, fh.get('photons')[hit_idx]) for hit_idx in hit_list])

In [None]:
path_h5_list = [ ('skopi/h5s.pnccd.5IJO/5IJO.1_hit.h5', 1, 180),
                 ('skopi/h5s.pnccd.5IJO/5IJO.2_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.5IJO/5IJO.3_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.5IJO/5IJO.4_hit.h5', 2, 60), ]
# num_hit = 200
hit_global_list = []
for path_h5, label, num_hit in path_h5_list:
    with h5py.File(path_h5, 'r') as fh:
        hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
        hit_global_list.extend([ (psana_img.get(0, fh.get('photons')[hit_idx]), label, (path_h5, str(hit_idx), str(label))) for hit_idx in hit_list])

In [None]:
import pickle
path_pickle = 'fastdata.h5/5IJO.pnccd.pickle'
with open(path_pickle, 'wb') as handle:
    pickle.dump(hit_global_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

#### Create fastdata

In [None]:
seed = 0
random.seed(seed)

In [None]:
hit_global_list = []

In [None]:
path_h5 = 'skopi/h5s.pnccd.5IJO/5IJO.1_hit.h5'
num_hit = 200
with h5py.File(path_h5, 'r') as fh:
    hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
    hit_global_list.extend([ psana_img.get(0, fh.get('photons')[hit_idx]) for hit_idx in hit_list])

In [None]:
path_h5_list = [ ('skopi/h5s.pnccd.5VFR/5VFR.1_hit.h5', 1, 180),
                 ('skopi/h5s.pnccd.5VFR/5VFR.2_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.5VFR/5VFR.3_hit.h5', 2, 60),
                 ('skopi/h5s.pnccd.5VFR/5VFR.4_hit.h5', 2, 60), ]
# num_hit = 200
hit_global_list = []
for path_h5, label, num_hit in path_h5_list:
    with h5py.File(path_h5, 'r') as fh:
        hit_list = random.sample(range(len(fh.get('photons'))), num_hit)
        hit_global_list.extend([ (psana_img.get(0, fh.get('photons')[hit_idx]), label, (path_h5, str(hit_idx), str(label))) for hit_idx in hit_list])

In [None]:
import pickle
path_pickle = 'fastdata.h5/5VFR.pnccd.pickle'
with open(path_pickle, 'wb') as handle:
    pickle.dump(hit_global_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

#### 

In [None]:
2305 / 4

#### Square detector

In [None]:
import 

In [None]:
path_h5 = 'skopi/h5s_mini.sq/6Q5U.2_hit.h5'

In [None]:
fh = h5py.File(path_h5, 'r')

In [None]:
orientation = fh.get('orientations')[0]
orientation

In [None]:
for i in [ random.randrange(0, len(fh.get('photons'))) for _ in range(20) ]:
    data = fh.get('photons')[i]
    img  = data[0]
    ## img = psana_img.get(0, data)
    plt.figure(figsize = (10, 8))
    data = img
    vmin = data.mean() - 0.4 * data.std()
    vmax = data.mean() + 2.0 * data.std()
    plt.imshow(data, vmin = vmin, vmax = vmax)
    plt.colorbar()

In [None]:
data.shape

In [None]:
172 // 2

In [None]:
data = fh.get('photons')[0][0]
mask = np.ones(data.shape, dtype = np.float32)
size_y, size_x = mask.shape
yc = size_y // 2
xc = size_x // 2

dy = 172
dx = 4
y_min = yc - dy // 2
y_max = yc + dy // 2
x_min = xc - dx // 2
x_max = xc + dx // 2
mask[y_min:y_max, x_min:x_max] = 0

dy = 6
dx = 8
y_min = yc - dy // 2
y_max = yc + dy // 2
x_min = xc - dx // 2
x_max = xc + dx // 2
mask[y_min:y_max, x_min:x_max] = 0

In [None]:
mask.shape

In [None]:
fl_mask = 'mask.npy'
drc_mask  = 'skopi/h5s_mini.sq'
path_mask = os.path.join(drc_mask, fl_mask)
np.save(path_mask, mask)

In [None]:
img = mask
## img = psana_img.get(0, data)
plt.figure(figsize = (10, 8))
data = img
vmin = data.mean() - 0.4 * data.std()
vmax = data.mean() + 1.0 * data.std()
plt.imshow(data, vmin = vmin, vmax = vmax)
plt.colorbar()

In [None]:
data = fh.get('photons')[i]
img  = data[0]

In [None]:
plt.figure(figsize = (10, 8))
# img = img * mask
vmin = 0
vmax = img.mean() + 0.01 * img.std()
plt.imshow(img, vmin = 0, vmax = 4)
plt.colorbar()

In [None]:
plt.figure(figsize = (10, 8))
img = img * mask
vmin = img.mean() - 0.0 * img.std()
vmax = img.mean() + 0.4 * img.std()
plt.imshow(img, vmin = 0, vmax = 4)
plt.colorbar()

In [None]:
from scipy.stats import gamma

In [None]:
shape, scale = 1., 1.
s = np.random.gamma(shape, scale, 1000)
s
plt.hist(s)

In [None]:
57 * 4 / 1000

In [None]:
img.shape, mask.shape

In [None]:
for i in [ random.randrange(0, len(fh.get('photons'))) for _ in range(20) ]:
    data = fh.get('photons')[i]
    img  = data[0]
    ## img = psana_img.get(0, data)
    plt.figure(figsize = (10, 8))
    img = img * mask
    vmin = img.mean() - 0.5 * img.std()
    vmax = img.mean() + 1.0 * img.std()
    plt.imshow(img, vmin = vmin, vmax = vmax)
    plt.colorbar()
    # plt.xlim(172//2 - 40, 127//2 + 40)

In [None]:
img.shape

In [None]:
fh.close()

#### Create fastdata (May)

In [None]:
!pwd

In [None]:
# Load paths for using psana
%env SIT_ROOT=/reg/g/psdm/
%env SIT_DATA=/cds/group/psdm/data/
%env SIT_PSDM_DATA=/cds/data/psdm/

In [None]:
import numpy as np
import os
import h5py
import random

In [None]:
import matplotlib              as mpl
import matplotlib.pyplot       as plt
import matplotlib.colors       as mcolors
import matplotlib.patches      as mpatches
import matplotlib.transforms   as mtransforms
import matplotlib.font_manager as font_manager
%matplotlib inline

In [None]:
from deepprojection.plugins import PsanaImg
from deepprojection.utils   import split_dataset, set_seed

In [None]:
seed = 0
random.seed(seed)

In [None]:
fl_dat = 'skopi/mini.dat'
with open(fl_dat, 'r') as fh:
    entry_list = [ line.strip() for line in fh.readlines() ]

In [None]:
entry_list[0]

In [None]:
train_set, hold_set = split_dataset(entry_list, 0.5)

In [None]:
train_set[-2]

In [None]:
len(train_set), len(hold_set)

In [None]:
3027 * (57 * 4 / 1000)

In [None]:
100 * (57 * 4 / 1000)

In [None]:
train_set[:100]

In [None]:
a = ("6Q5U", "1_hit")
b = ("6Q5U", "2_hit")

In [None]:
a == b

In [None]:
("6Q5U", "1_hit") == ("6Q5U", "1_hit")

In [None]:
pdb_list = train_set[:100]

In [None]:
def create_path_label_list(pdb, drc):
    path_label_list = []
    for i in range(1, 4 + 1):
        fl_h5   = f"{pdb}.{i}_hit.h5"
        path_h5 = os.path.join(drc, fl_h5)
        label   = (pdb, i)
        path_label_list.append((path_h5, label))
    
    return path_label_list

In [None]:
drc = 'skopi/h5s_mini.sq'
create_path_label_list("6Q5U", drc)

In [None]:
fh.get('photons').shape

In [None]:
global_data_list = []
for pdb in pdb_list:
    path_label_list = create_path_label_list(pdb, drc)
    mask = np.load('skopi/h5s_mini.sq/mask.npy')
    for path_h5, label in path_label_list:
        data_list = []
        with h5py.File(path_h5, 'r') as fh:     
            for idx, img in enumerate(fh.get('photons')):
                img = img * mask[None,]
                data_list.append((img[0], label, f"{label[0]} {label[1]} {idx}"))
        global_data_list.extend(data_list)

In [None]:
import pickle
path_pickle = 'fastdata.h5/mini.sq.pickle'
with open(path_pickle, 'wb') as handle:
    pickle.dump(global_data_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
np.array(global_data_list).shape

In [None]:
global_data_list[40]

In [None]:
img = global_data_list[40][0][0]
plt.figure(figsize = (10, 8))
vmin = img.mean() - 0.0 * img.std()
vmax = img.mean() + 0.4 * img.std()
plt.imshow(img, vmin = 0, vmax = 4)
plt.colorbar()

In [None]:
import pickle
path_pickle = 'fastdata.h5/mini.sq.pickle'
with open(path_pickle, 'rb') as handle:
    loaded_data_list = pickle.load(handle)

#### Clean data

In [None]:
len(loaded_data_list)

#### Create fastdata (Dec)

In [None]:
!pwd

In [None]:
# Load paths for using psana
%env SIT_ROOT=/reg/g/psdm/
%env SIT_DATA=/cds/group/psdm/data/
%env SIT_PSDM_DATA=/cds/data/psdm/

In [None]:
import numpy as np
import os
import h5py
import random

In [None]:
import matplotlib              as mpl
import matplotlib.pyplot       as plt
import matplotlib.colors       as mcolors
import matplotlib.patches      as mpatches
import matplotlib.transforms   as mtransforms
import matplotlib.font_manager as font_manager
%matplotlib inline

In [None]:
from deepprojection.plugins import PsanaImg
from deepprojection.utils   import split_dataset, set_seed

In [None]:
seed = 0
random.seed(seed)

In [None]:
fl_dat = 'skopi/mini.sq.dat'
with open(fl_dat, 'r') as fh:
    entry_list = [ line.strip() for line in fh.readlines() ]

In [None]:
entry_list[0]

In [None]:
train_set, hold_set = split_dataset(entry_list, 0.5)

In [None]:
train_set[-2]

In [None]:
len(train_set), len(hold_set)

In [None]:
3027 * (57 * 4 / 1000)

In [None]:
100 * (57 * 4 / 1000)

In [None]:
train_set[:100]

In [None]:
a = ("6Q5U", "1_hit")
b = ("6Q5U", "2_hit")

In [None]:
a == b

In [None]:
("6Q5U", "1_hit") == ("6Q5U", "1_hit")

In [None]:
pdb_list = entry_list

In [None]:
def create_path_label_list(pdb, drc):
    path_label_list = []
    for i in range(1, 4 + 1):
        fl_h5   = f"{pdb}.{i}_hit.h5"
        path_h5 = os.path.join(drc, fl_h5)
        label   = (pdb, i)
        path_label_list.append((path_h5, label))
    
    return path_label_list

In [None]:
drc = 'skopi/h5s_mini.sq.train'
create_path_label_list("6Q5U", drc)

In [None]:
fh.get('photons').shape

#### Original label

In [None]:
global_data_list = []
for pdb in pdb_list:
    path_label_list = create_path_label_list(pdb, drc)
    mask = np.load('skopi/h5s_mini.sq/mask.npy')
    for path_h5, label in path_label_list:
        data_list = []
        with h5py.File(path_h5, 'r') as fh:     
            for idx, img in enumerate(fh.get('photons')):
                img = img * mask[None,]
                data_list.append((img[0], label, f"{label[0]} {label[1]} {idx}"))
        global_data_list.extend(data_list)

#### Label corrected

In [None]:
global_data_list = []
for pdb in pdb_list:
    path_label_list = create_path_label_list(pdb, drc)
    mask = np.load('skopi/h5s_mini.sq/mask.npy')
    for path_h5, label in path_label_list:
        data_list = []
        
        # Correct label...
        pdb, hit_type = label
        hit_type = min(hit_type, 2)
        label_corrected = (pdb, hit_type) 
        
        with h5py.File(path_h5, 'r') as fh:     
            for idx, img in enumerate(fh.get('photons')):
                img = img * mask[None,]
                # data_list.append((img[0], label, f"{label[0]} {label[1]} {idx}"))
                data_list.append((img[0], label_corrected, f"{label_corrected[0]} {label_corrected[1]}({label[1]}) {idx}"))
        global_data_list.extend(data_list)

In [None]:
global_data_list[300+1]

In [None]:
global_data_list[200+1]

In [None]:
global_data_list[100+1]

In [None]:
import pickle
path_pickle = 'fastdata.h5/mini.sq.train.relabel.pickle'
with open(path_pickle, 'wb') as handle:
    pickle.dump(global_data_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
global_data_list[20]

In [None]:
img = global_data_list[5233][0]
plt.figure(figsize = (10, 8))
vmin = img.mean() - 0.4 * img.std()
vmax = img.mean() + 0.4 * img.std()
plt.imshow(img, vmin = vmin, vmax = vmax)
plt.colorbar()

In [None]:
with open(path_pickle, 'rb') as handle:
    loaded_data_list = pickle.load(handle)

In [None]:
loaded_data_list[99+10]

In [None]:
100 * 100