In [1]:
import numpy as np
import fitsio
import tqdm
import glob
from esutil.pbar import PBar

In [2]:
def bootstrap(x, func, n=500, rng=None):
    if rng is None:
        rng = np.random.RandomState()
    vals = []
    for _ in range(n):
        inds = rng.choice(x.shape[0], replace=True, size=x.shape[0])
        vals.append(func(x[inds]))
    return func(x), np.std(vals)

In [3]:
def groupby(a, key):
    a.sort(axis=0, order=key)
    for item in np.split(a, np.unique(a[key], return_index=True)[1][1:]):
        yield item[key][0], item

In [4]:
func = np.median

In [5]:
import joblib

def _process_file(f):
    d = []
    _d = fitsio.read(f)
    bands = np.unique(_d["band"])
    for tn, dtn in groupby(_d, "tilename"):
        assert np.all(dtn["tilename"] == tn)
        for b in bands:
            msk = dtn["band"] == b
            if not np.any(msk):
                continue
                
            d.append((
                func(dtn["de1"][msk]),
                func(dtn["de2"][msk]),
                np.sum(msk),
                tn,
                b,
            ))
    return np.array(d, dtype=[
        ("de1", "f8"),
        ("de2", "f8"),
        ("n", "f8"),
        ("tilename", "U12"),
        ("band", "U1"),
    ])    


fnames = glob.glob("astrom_data_all_basecolor1.40_*.fits")
jobs = [joblib.delayed(_process_file)(f) for f in fnames]

with joblib.Parallel(n_jobs=8, verbose=100) as par:
    outputs = par(jobs)

d = np.concatenate(outputs)

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   1 tasks      | elapsed:  1.1min
[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed:  1.1min
[Parallel(n_jobs=8)]: Done   3 tasks      | elapsed:  1.1min
[Parallel(n_jobs=8)]: Done   4 tasks      | elapsed:  1.2min
[Parallel(n_jobs=8)]: Done   5 tasks      | elapsed:  1.2min
[Parallel(n_jobs=8)]: Done   6 tasks      | elapsed:  1.2min
[Parallel(n_jobs=8)]: Done   7 tasks      | elapsed:  1.2min
[Parallel(n_jobs=8)]: Done   8 tasks      | elapsed:  1.2min
[Parallel(n_jobs=8)]: Done   9 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done  10 tasks      | elapsed:  2.2min
[Parallel(n_jobs=8)]: Done  11 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done  12 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done  13 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done  14 tasks      | elapsed:  2.3min
[Parallel(n_jobs=8)]: Done  15 tasks      | elapsed:  2.3min
[Parallel(

In [6]:
bands = np.unique(d["band"])
tilenames = np.unique(d["tilename"])

In [9]:
all_res = {}

for _bands in ["g", "ri", "gri"]:
    bands = [b for b in _bands]
    e1 = []
    e2 = []
    for tilename in tilenames:
        msk = (
            (d["tilename"] == tilename)
            & np.isin(d["band"], bands)
        )
        if not np.any(msk):
            continue
        e1.append(np.mean(d["de1"][msk]))
        e2.append(np.mean(d["de2"][msk]))

    e1 = np.array(e1)
    e2 = np.array(e2)
    all_res[_bands] = dict(e1=e1, e2=e2)

In [10]:
for b in all_res:
    e1 = all_res[b]["e1"]
    e2 = all_res[b]["e2"]
    e1mn, e1std = bootstrap(e1, np.mean)
    e2mn, e2std = bootstrap(e2, np.mean)

    print("%s:" % b)
    print("    e1 [10^-4, 3sigma]: %0.3f +/- %0.3f" % (e1mn/1e-4, 3*e1std/1e-4))
    print("    e2 [10^-4, 3sigma]: %0.3f +/- %0.3f" % (e2mn/1e-4, 3*e2std/1e-4), flush=True)

g:
    e1 [10^-4, 3sigma]: 0.912 +/- 0.147
    e2 [10^-4, 3sigma]: 0.573 +/- 0.125
ri:
    e1 [10^-4, 3sigma]: 0.032 +/- 0.003
    e2 [10^-4, 3sigma]: 0.017 +/- 0.003
gri:
    e1 [10^-4, 3sigma]: 0.387 +/- 0.058
    e2 [10^-4, 3sigma]: 0.204 +/- 0.047
