In [4]:
from orphics import sehgal, maps
import healpy as hp
from pixell import utils, enmap, curvedsky, enplot, wcsutils
import os
import numpy as np

import matplotlib.pyplot as plt
import lmdb
from cosmikyu import datasets, transforms, config, stats

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
data_dir = config.default_data_dir
sehgal_dir = os.path.join(data_dir, 'sehgal')
def data_path(x):
    return os.path.join(sehgal_dir, x)
SDS_validation = datasets.SehgalDataSet(sehgal_dir, data_type="validation141020", transforms=[], dummy_label=False)
data = np.zeros((5, 128, 128*len(SDS_validation)))
compts = ["kappa", "ksz", "tsz", "ir_pts", "rad_pts"]

for i in range(len(SDS_validation)):
#for i in range(1000):
    sidx = 128*i
    data[...,sidx: sidx+128] = SDS_validation[i]
    
def sehgal_path(x):
    return os.path.join(sehgal_dir, x)

#enplot.pshow(data[:,:128,:128])

In [None]:
zfact = 1
def log_normalize(emap, pos=True, neg=True):
    temp = emap.copy()
    #loc = np.where(emap!=0)
    std = np.std(temp)
    if pos:
        loc = np.where(emap>=0)
        temp[loc] = np.log(temp[loc]/std+1)
    if neg:
        loc = np.where(emap<0)
        temp[loc] = -1*np.log(np.abs(temp[loc]/std)+1)

    return temp


def z_normalize(emap, zfact = zfact):
    mean, std = emap.mean(), emap.std()   
    return (emap-mean)/(std*zfact), [mean, std]

def minmax(emap):
    maxval, minval = emap.max(), emap.min()
    valrange = (maxval-minval)
    midval = (maxval+minval)/2
    return (emap-midval)/valrange*2, [minval, maxval]


freq_idx = 148
'''
ns = {"kappa": z_normalize,
      "ksz": z_normalize,
      "ir_pts": lambda x: minmax(log_normalize(x, neg=True)),
      "rad_pts": lambda x: minmax(log_normalize(x, neg=True)),
      "tsz": lambda x: minmax(log_normalize(x, pos=True)),
     }
'''
ns = {"kappa": z_normalize,
      "ksz": z_normalize,
      "ir_pts": lambda x: z_normalize(log_normalize(x, neg=False)),
      "rad_pts": lambda x: z_normalize(log_normalize(x, neg=False)),
      "tsz": lambda x: z_normalize(log_normalize(x, pos=True)),
     }

ns = {"kappa": lambda x: tanh(z_normalize(x)),
      "ksz": lambda x: tanh(z_normalize(x)),
      "ir_pts": lambda x: tanh(log_normalize(x, neg=True)),
      "rad_pts": lambda x: tanh(log_normalize(x, neg=True)),
      "tsz": lambda x: tanh(log_normalize(x, neg=True)),
     }

ns = {"kappa": lambda x: minmax(x),
      "ksz": lambda x: minmax(x),
      "ir_pts": lambda x: minmax(log_normalize(x, neg=True)),
      "rad_pts": lambda x: minmax(log_normalize(x, neg=True)),
      "tsz": lambda x: minmax(log_normalize(x, neg=True)),
     }

norm_info_validation = {}
hist_org_validation = {}
hist_norm_validation = {}
compts = ["kappa", "ksz", "tsz", "ir_pts", "rad_pts"]


for i, compt_idx in enumerate(compts):
    print(compt_idx)
    #if i < 3: continue

    temp = data[i].copy() 
    norm_info_validation[compt_idx] = []
    norm_info_validation[compt_idx] += [temp.min(), temp.std()]
    #minval, maxval = temp.min(), temp.max()
    
    nbins = 1024
    hist, bins = np.histogram(data[i].copy(), bins=1024)
    fig = plt.figure(figsize=(10, 5))
    hist_org_validation[compt_idx] = (hist, bins)
    bin_center = (bins[:-1]+bins[1:])/2.
    plt.plot(bin_center, hist/np.sum(hist), label=compt_idx)
    plt.legend()
    plt.axvline(x=1, ls="--", color="k")
    plt.axvline(x=-1, ls="--", color="k")
    plt.yscale("log")
    #plt.xlim(-5,5)
    #plt.xscale("symlog")
    plt.show()
    
    ndata, temp = ns[compt_idx](data[i].copy())
    norm_info_validation[compt_idx] += temp
    print(norm_info_validation[compt_idx])
    hist, bins = np.histogram(ndata, bins=1024)
    fig = plt.figure(figsize=(10, 5))
    hist_norm_validation[compt_idx] = (hist, bins)
    bin_center = (bins[:-1]+bins[1:])/2.
    plt.plot(bin_center, hist/np.sum(hist), label=compt_idx)
    plt.axvline(x=1, ls="--", color="k")
    plt.axvline(x=-1, ls="--", color="k")
    plt.axhline(y=1e-5, ls="--", color="k")
    plt.legend()
    #plt.xlim(-12,12)
    plt.yscale("log")
    #plt.xscale("symlog")
    plt.show()
    
    hist, bins = np.histogram(ndata, bins=1024)
    fig = plt.figure(figsize=(10, 5))
    plt.plot(bin_center, hist/np.sum(hist), label=compt_idx)
    plt.axvline(x=1, ls="--", color="k")
    plt.axvline(x=-1, ls="--", color="k")
    plt.axhline(y=1e-5, ls="--", color="k")
    plt.legend()
    #plt.xlim(-12,12)
    plt.yscale("log")
    #plt.xscale("symlog")
    plt.show()


In [9]:
norm_info_validation_out = {} 

for idx in norm_info_validation.keys():
    norm_info_validation_out[idx] = {"min":float(norm_info_validation[idx][0]), "std":float(norm_info_validation[idx][1]),
                          "logmean":float(norm_info_validation[idx][2]), "logstd":float(norm_info_validation[idx][3])}
    
    print(idx, norm_info_validation_out[idx])
hist_org_validation_out = {} 
for idx in hist_org_validation.keys():
    hist_org_validation_out[idx] = {}
    hist_org_validation_out[idx]["hist"] = hist_org_validation[idx][0]
    bin_edges =  hist_org_validation[idx][1]
    hist_org_validation_out[idx]["bin_centers"] = (bin_edges[:-1]+bin_edges[1:])/2.
    hist_org_validation_out[idx]["bin_edges"] = bin_edges
    


np.savez(data_path("141020_normalization_info_validation.npz"), **norm_info_validation_out)


kappa {'min': -0.6806576117720182, 'std': 0.07407179512283714, 'logmean': -2.6458898250364806e-05, 'logstd': 0.07407179512283714}
ksz {'min': -37.412660533440295, 'std': 2.1445520917184044, 'logmean': -0.00406070836384978, 'logstd': 2.1445520917184044}
tsz {'min': -290.8268366090156, 'std': 2.567613946408688, 'logmean': -0.6697793761660281, 'logstd': 0.27506589295912703}
ir_pts {'min': -3.9728085209285857, 'std': 6.573228652867444, 'logmean': 1.2275226201566654, 'logstd': 0.2578566082552369}
rad_pts {'min': -2.6336878229445273, 'std': 3.6703679499683948, 'logmean': 0.03511814342501448, 'logstd': 0.18265126946443766}


In [1]:
norm_info_file = "/home/dwhan89/workspace/cosmikyu/data/sehgal/141020_normalization_info_validation.npz"
SDN = transforms.SehgalDataNormalizerScaledLogZ(norm_info_file)
SDS_validation = datasets.SehgalDataSet(sehgal_dir, data_type="validation141020", transforms=[SDN], dummy_label=False)

nsample = len(SDS_validation)
data = np.zeros((5, 128, 128*nsample))
nbins = 10000

for i in range(nsample):
    if i % 5000 == 0: print(i)
    sidx = 128*i
    data[...,sidx: sidx+128] = SDS_validation[i]
print(data.min(), data.max(), data.mean())
print("start binning")
MB = stats.FastMultBinner((-30,30), nbins, data.shape[0])
MB.bin(data)
    
ret = MB.get_info()
out = {}
for key in range(5):
    print(key)
    out[SDN.channel_idxes[key]] = ret[key].copy()
ret = out
np.savez(sehgal_path("141020_normalized_histogram_validation_{}.npz".format(nbins)), **out)

NameError: name 'transforms' is not defined

In [7]:
norm_info_file = "/home/dwhan89/workspace/cosmikyu/data/sehgal/141020_normalization_info_validation.npz"
SDN = transforms.SehgalDataNormalizerScaledLogZ(norm_info_file)
SDS_train = datasets.SehgalDataSet(sehgal_dir, data_type="train141020", transforms=[SDN], dummy_label=False)

nsample = len(SDS_train)
data = np.zeros((5, 128, 128*nsample))

nbins = 10000
for i in range(nsample):
    if i % 5000 == 0: print(i)
    sidx = 128*i
    data[...,sidx: sidx+128] = SDS_train[i]
print(data.min(), data.max(), data.mean())
print("start binning")
MB = stats.FastMultBinner((-30,30), nbins, data.shape[0])
MB.bin(data)
    
ret = MB.get_info()
out = {}
for key in range(5):
    print(key)
    out[SDN.channel_idxes[key]] = ret[key].copy()
ret = out
np.savez(sehgal_path("141020_normalized_histogram_train_{}.npz".format(nbins)), **out)

0
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
60000
65000
70000
75000
80000
85000
90000
95000
100000
105000
110000
115000
120000
125000
130000
135000
140000
145000
150000
155000
160000
165000
170000
175000
180000
185000
190000
195000
-17.451651082817836 28.086762952427456 0.01014514060865161
start binning
0
1
2
3
4


NameError: name 'sehgal_path' is not defined

In [11]:
norm_info_file = "/home/dwhan89/workspace/cosmikyu/data/sehgal/141020_normalization_info_validation.npz"
SDN = transforms.SehgalDataNormalizerScaledLogZ(norm_info_file)
SDS_test = datasets.SehgalDataSet(sehgal_dir, data_type="test141020", transforms=[SDN], dummy_label=False)

nsample = len(SDS_test)
data = np.zeros((5, 128, 128*nsample))
SDS_test
nbins = 10000
for i in range(nsample):
    if i % 5000 == 0: print(i)
    sidx = 128*i
    data[...,sidx: sidx+128] = SDS_test[i]
print(data.min(), data.max(), data.mean())
print("start binning")
MB = stats.FastMultBinner((-30,30), nbins, data.shape[0])
MB.bin(data)
    
ret = MB.get_info()
out = {}
for key in range(5):
    print(key)
    out[SDN.channel_idxes[key]] = ret[key].copy()
ret = out
np.savez(sehgal_path("141020_normalized_histogram_test_{}.npz".format(nbins)), **out)

0
5000
10000
15000
20000
25000
-17.451651082817836 28.078867012023945 0.009773964663625364
start binning
0
1
2
3
4


In [12]:
norm_info_file = "/home/dwhan89/workspace/cosmikyu/data/sehgal/141020_normalization_info_validation.npz"
SDN = transforms.SehgalDataNormalizerScaledLogZ(norm_info_file)
SDS_test = datasets.SehgalDataSet(sehgal_dir, data_type="test141020", transforms=[], dummy_label=False)

nsample = len(SDS_test)
data = np.zeros((5, 128, 128*nsample))

nbins = 10000
for i in range(nsample):
    if i % 5000 == 0: print(i)
    sidx = 128*i
    data[...,sidx: sidx+128] = SDS_test[i]
print(data.min(), data.max(), data.mean())
print("start binning")
MB = stats.FastMultBinner((-30,30), nbins, data.shape[0])
MB.bin(data)
    
ret = MB.get_info()
out = {}
for key in range(5):
    print(key)
    out[SDN.channel_idxes[key]] = ret[key].copy()
ret = out
np.savez(sehgal_path("141020_raw_histogram_test_{}.npz".format(nbins)), **out)

0
5000
10000
15000
20000
25000
-290.7731033193691 803.6106114930507 2.849090983757404
start binning
0
1
2
3
4
