In [1]:
import pandas as pd
import numpy as np
import numba as nb
from numba import cuda
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32
import math
from scipy.optimize import minimize, minimize_scalar, differential_evolution
import os
import sys
import json
import datetime
import argparse

In [2]:
# PRIVATE CONSTANT PARAMETERS
NBIN_R2_HET_HIST = 256 # 256
THREADS_PER_BLOCK = 32
COL_DTYPE = dict(SNP='U',
                 N='f4',
                 Z='f4', 
                 INFO='f4',
                 A1='U',
                 A2='U') # only SNP and trait-specific columns, other columns (CHR, BP, MAF) are taken from template

In [11]:
def parse_args(args):
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", default='default_config.py', help="Path to configuration file.")
    return parser.parse_args(args)


def load_config(config_fname):
    with open(config_fname) as f:
        config = json.load(f)
    return config

# --- Data loading and processing functions ---

@nb.njit
def get_r2_het(r2, r2_idx, het):
    r2_het = np.empty_like(r2)
    for i in range(len(r2)):
        r2_het[i] = r2[i]*het[r2_idx[i]]
    return r2_het


@nb.njit
def get_ld_n_idx(ld_n):
    # ld_n: ld_n[i] = size of i-th LD block
    # Returns:
    #     ld_n_idx: ld_n_idx[i] = index of the first element of the i-th LD block in corersponding r2 and r2_idx arrays
    ld_n_idx = np.zeros(len(ld_n)+1, dtype='i8') # indices of LD blocks in r2/r2_idx vectors
    for i, n_in_ld in enumerate(ld_n):
        ld_n_idx[i+1] = ld_n_idx[i] + n_in_ld # cumsum
    return ld_n_idx

@nb.njit
def get_r2_het_hist(b2use, r2_het, ld_n):
    n2use = b2use.sum() # number of used SNPs
    r2_het_hist = np.zeros(n2use*NBIN_R2_HET_HIST, dtype='f4')
    # r2_het_hist_edges: max = 0.5, min = 0.5/NBIN_R2_HET_HIST
    r2_het_hist_edges = np.linspace(0.5/NBIN_R2_HET_HIST, 0.5, NBIN_R2_HET_HIST).astype(np.float32)
    #r2_het_hist_edges = np.array([0.5/(NBIN_R2_HET_HIST - i) for i in range(NBIN_R2_HET_HIST)], dtype='f4')
    i_r2orig = 0
    i_template2use = 0
    for i_template, nld in enumerate(ld_n):
        if b2use[i_template]:
            for x_r2_het in r2_het[i_r2orig:i_r2orig+nld]:
                for i, hist_edge in enumerate(r2_het_hist_edges):
                    if x_r2_het <= hist_edge:
                        r2_het_hist[i_template2use*NBIN_R2_HET_HIST + i] += 1
                        break
            i_template2use += 1
        i_r2orig += nld
    return r2_het_hist

@nb.njit
def prune(pval, r2, r2_idx, ld_n, b2use, r2_thresh):
    # b2use: bool vector of SNP indices in template to consider for pruning. All indices not in i2use will not survive.
    # Returns:
    #     is_survived: bool vector, True if SNP survives pruning
    assert len(pval) == len(ld_n)
    assert len(b2use) == len(ld_n)
    isort = np.argsort(pval)
    is_survived = b2use[:]
    ld_n_idx = get_ld_n_idx(ld_n)
    for i in isort:
        if is_survived[i]:
            for j in range(ld_n_idx[i], ld_n_idx[i+1]): # j = index in r2/r2_idx vector
                if r2[j] > r2_thresh:
                    is_survived[r2_idx[j]] = False
            is_survived[i] = True
    return is_survived

@nb.njit
def get_total_het_used_chr(b2use, r2_idx, het, ld_n):
    n_use = np.zeros(len(b2use), dtype='i4') # n_use[i] = number of times i-th SNP on chromosome from template was in LD with any SNP used for fitting
    i_r2orig = 0
    for i_template, nld in enumerate(ld_n):
        if b2use[i_template]:
            n_use[r2_idx[i_r2orig:i_r2orig+nld]] += 1
        i_r2orig += nld
    # (n_use != 0).sum() = number of SNPs from template appeared at least once in LD with any SNP used for fitting
    # (n_use*het).sum() = sum of het of SNPs in all LD of all SNPs used for fiting
    # n_use.sum() == number of SNPs in all LD blocks of all SNPs used for fitting
    total_n_used = (n_use!=0).sum()
    total_het_used = het[n_use!=0].sum()
    return total_het_used, total_n_used


def get_total_het_used(template_dir, snps_df, rand_prune_seed, r2_prune_thresh):
    rng = np.random.default_rng(rand_prune_seed)
    total_het, total_n = 0, 0
    for chrom in snps_df.CHR.unique():
        ld_r2_file = os.path.join(template_dir, f'chr{chrom}.ld_r2')
        ld_idx_file = os.path.join(template_dir, f'chr{chrom}.ld_idx')
        r2 = np.memmap(ld_r2_file, dtype='f4', mode='r')
        r2_idx = np.memmap(ld_idx_file, dtype='i4', mode='r')
        snps_df_chr = snps_df.loc[snps_df.CHR == chrom,:]
        het = 2*snps_df_chr.MAF.values*(1 - snps_df_chr.MAF.values)
        ld_n = snps_df_chr.LD_N.values
            
        b2use = snps_df_chr.IS_VALID.values
        rand_pval = rng.random(snps_df_chr.shape[0])
        bpruned = prune(rand_pval, r2, r2_idx, ld_n, b2use, r2_prune_thresh)
        total_het_chr, total_n_chr = get_total_het_used_chr(bpruned, r2_idx, het, ld_n)
        total_het += total_het_chr
        total_n += total_n_chr
    total_het_used = len(snps_df)*total_het/total_n
    return total_het_used

def swap_z_sign(snps_df, n):
    # Change snps_df inplace. Check allele correspondence between template and sumstats.
    # Set IS_VALID = False for SNPs with allele-mismatch.
    # Swap Z sign for SNPs with swapped alleles.
    # Drop A1_i and A2_i columns (alleles loaded from sumstats).
    
    # Make reverse compliments of reference alleles
    compliments = str.maketrans("ATGC","TACG")
    make_rev_comp = lambda x: x.translate(compliments)[::-1]
    a1_rev_comp = snps_df.A1.apply(make_rev_comp)
    a2_rev_comp = snps_df.A2.apply(make_rev_comp)
    
    for i in range(n):
        z_col, a1_col, a2_col = f"Z_{i}", f"A1_{i}", f"A2_{i}"
        # check if A1 in template is A1 or A2 (both possibly reverse compliment) in sumstats,
        # if nither fits the SNP is invalid.
        i_a1_is_a1 = (snps_df.A1 == snps_df[a1_col]) & (snps_df.A2 == snps_df[a2_col])
        i_a1_is_a1 |= (a1_rev_comp == snps_df[a1_col]) & (a2_rev_comp == snps_df[a2_col])
        i_a1_is_a2 = (snps_df.A1 == snps_df[a2_col]) & (snps_df.A2 == snps_df[a1_col])
        i_a1_is_a2 |= (a1_rev_comp == snps_df[a2_col]) & (a2_rev_comp == snps_df[a1_col])
        # SNP is valid if its alleles match either directly (A1-A1) or swapped (A1-A2), both possibly with reverse compliment
        snps_df.IS_VALID &= i_a1_is_a1 | i_a1_is_a2
        snps_df.loc[i_a1_is_a2, z_col] *= -1 # if A1 in template is A2 in sumstats swap sign of Z
        snps_df.drop(columns=[a1_col, a2_col], inplace=True)
        
        
def load_snps(template_dir, sumstats, *, chromosomes=range(1,23),
              z_thresh=None, info_thresh=None, maf_thresh=None, exclude_regions=[]):
    # Load template SNPs for given chromosomes.
    # Load (multiple) sumstats.
    # Merge each sumstats with template.
    # Allign alleles in sumstats to template and swap effect direction correspondingly.
    # Add IS_VALID column which is True for SNPs for all SNPs passing specified filtering
    if isinstance(sumstats, str):
        sumstats = [sumstats]
        
    # Read template SNPs
    print(f"Reading template SNPs for {len(chromosomes)} chromosomes from {template_dir}")
    snps_df_list = []
    for chrom in chromosomes:
        snp_file = os.path.join(template_dir, f'chr{chrom}.snp.gz')
        df = pd.read_table(snp_file, dtype={"CHR":'i4',"MAF":'f4',"LD_N":'i4'})
        snps_df_list.append(df)
    snps_df = pd.concat(snps_df_list, ignore_index=True)
    print(f"    {snps_df.shape[0]} SNPs")
    snps_df["IS_VALID"] = True
    # Read sumstats
    for i, fname in enumerate(sumstats):
        print(f"Loading sumstats from {fname}")
        cols = pd.read_table(fname, nrows=0).columns
        usecols = [c for c in cols if c in COL_DTYPE]
        df = pd.read_table(fname, usecols=usecols, dtype=COL_DTYPE)
        df.drop_duplicates(subset=["SNP"], keep='first', inplace=True)
        print(f"    {df.shape[0]} SNPs")
        col_rename = {c:f"{c}_{i}" for c in usecols if c!="SNP"}
        df.rename(columns=col_rename, inplace=True)
        snps_df = pd.merge(snps_df, df, on="SNP", how="left")
        snps_df.IS_VALID &= snps_df[f"Z_{i}"].notna() & snps_df[f"N_{i}"].notna()
    print(f"{snps_df.IS_VALID.sum()} common SNPs")
    
    # swap Z_i signs of z-scores and IS_VALID = False when alleles do not correspond to reference
    swap_z_sign(snps_df, len(sumstats))
    print(f"{snps_df.IS_VALID.sum()} SNPs with matched alleles")
    
    # Apply filters
    if z_thresh:
        z_cols = [c for c in snps_df.columns if c.startswith("Z_")]
        snps_df.IS_VALID &= (snps_df[z_cols].abs() < z_thresh).all(axis="columns")
        print(f"{snps_df.IS_VALID.sum()} SNPs with Z < {z_thresh}")
    if info_thresh:
        info_cols = [c for c in snps_df.columns if c.startswith("INFO_")]
        snps_df.IS_VALID &= (snps_df[info_cols] > info_thresh).all(axis="columns")
        print(f"{snps_df.IS_VALID.sum()} SNPs with INFO > {info_thresh}")
    if maf_thresh:
        snps_df.IS_VALID &= snps_df.MAF > maf_thresh
        print(f"{snps_df.IS_VALID.sum()} SNPs with MAF > {maf_thresh}")
    for region in exclude_regions:
        chrom, start_end = region.split(":")
        chrom = int(chrom)
        start, end = map(int, start_end.split("-"))
        i_drop = (snps_df.CHR == chrom) & (snps_df.BP > start) & (snps_df.BP < end)
        snps_df.IS_VALID &= ~i_drop
        print(f"    {i_drop.sum()} SNPs excluded from {region}")
    print(f"{snps_df.IS_VALID.sum()} SNPs after all filters")   
    return snps_df

def load_opt_data(template_dir, snps_df, *, r2_prune_thresh, rand_prune_seed):
    # snps_df is produced by load_snps()
    print("Loading LD data")
    z_cols = [c for c in sorted(snps_df.columns) if c.startswith("Z_")]
    n_cols = [c for c in sorted(snps_df.columns) if c.startswith("N_")]
    assert all(z_col.split('_')[1] == n_col.split('_')[1] for z_col, n_col in zip(z_cols, n_cols))
    z_n_dict = {c:[] for c in z_cols + n_cols}
    r2_het_hist_list = []
    rng = np.random.default_rng(rand_prune_seed)
    for chrom in snps_df.CHR.unique():
        print(f"Processing chr {chrom}")
        # load template
        ld_r2_file = os.path.join(template_dir, f'chr{chrom}.ld_r2')
        ld_idx_file = os.path.join(template_dir, f'chr{chrom}.ld_idx')
        r2 = np.memmap(ld_r2_file, dtype='f4', mode='r')
        r2_idx = np.memmap(ld_idx_file, dtype='i4', mode='r')
        snps_df_chr = snps_df.loc[snps_df.CHR == chrom,:]
        het = 2*snps_df_chr.MAF.values*(1 - snps_df_chr.MAF.values)
        ld_n = snps_df_chr.LD_N.values
        # random prune
        b2use = snps_df_chr.IS_VALID.values
        rand_pval = rng.random(snps_df_chr.shape[0])
        bpruned = prune(rand_pval, r2, r2_idx, ld_n, b2use, r2_prune_thresh)
        print(f"    {bpruned.sum()} SNPs survive pruning")
        print(f"    {ld_n[bpruned].mean():.2f} mean size of LD block of pruned SNPs")
        
        r2_het = get_r2_het(r2, r2_idx, het)
        r2_het_hist = get_r2_het_hist(bpruned, r2_het, ld_n)
        r2_het_hist_list.append(r2_het_hist)
        
        for z_col, n_col in zip(z_cols, n_cols):
            z = snps_df_chr.loc[bpruned,z_col].values
            n = snps_df_chr.loc[bpruned,n_col].values
            z_n_dict[z_col].append(z)
            z_n_dict[n_col].append(n)
    r2_het_hist = np.concatenate(r2_het_hist_list)
    for col, val_list in z_n_dict.items():
        z_n_dict[col] = np.concatenate(val_list)
    print(f"{z_n_dict['Z_0'].size} SNPs loaded")
    print(f"{r2_het_hist.sum()/z_n_dict['Z_0'].size:.2f} mean size of LD block of loaded SNPs")
    return r2_het_hist, z_n_dict


# --- Univariate optimization ---

def cost1x(p, sb2, s02, n_gpu, z_gpu, r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread):
    blockspergrid = z_gpu.size
    cost1x_gpu[blockspergrid, THREADS_PER_BLOCK](p, sb2, s02, n_gpu, z_gpu, r2_het_hist_gpu, res_gpu,
                                                      rng_states, samples_per_thread)
    return res_gpu.copy_to_host().mean()

@cuda.jit
def cost1x_gpu(p, sb2, s02, n, z, r2_het_hist, res, rng_states, samples_per_thread):
    r2_het_hist_shared = cuda.shared.array(NBIN_R2_HET_HIST, dtype='f4')
    res_shared = cuda.shared.array(THREADS_PER_BLOCK, dtype='f8')
    
    pos = cuda.grid(1)
    i_thread_in_block = cuda.threadIdx.x
    i_block = cuda.blockIdx.x # i_block is also index of SNP
    
    L = 1/math.log1p(-p)
    zt, nt = z[i_block], n[i_block]
    
    for i in range(i_thread_in_block, NBIN_R2_HET_HIST, THREADS_PER_BLOCK):
        r2_het_hist_shared[i] = r2_het_hist[i_block*NBIN_R2_HET_HIST + i]
    res_shared[i_thread_in_block] = 0
    
    cuda.syncthreads() # wait for all threads in the block
    
    for i in range(samples_per_thread):
        rand = xoroshiro128p_uniform_float32(rng_states, pos)
        causal_i = math.ceil(L*math.log(1 - rand))
        n_passed = 0
        se2 = 0
        for j,n_in_bin in enumerate(r2_het_hist_shared):
            n_passed += n_in_bin
            while causal_i <= n_passed:
                se2 += (0.5*j + 0.25)/NBIN_R2_HET_HIST # middle of the bin
                rand = xoroshiro128p_uniform_float32(rng_states, pos)
                causal_i += math.ceil(L*math.log(1 - rand)) # math.ceil returns float32 on GPU
        se2 = se2*sb2*nt + s02
        res_shared[i_thread_in_block] += math.exp(-0.5*zt**2/se2) / math.sqrt(2*math.pi*se2)
    
    cuda.syncthreads()
    
    if i_thread_in_block == 0:
        s = 0
        for x in res_shared: s += x
        res[i_block] = -math.log(s/(THREADS_PER_BLOCK*samples_per_thread))
        

def objf1x(par_vec, n_gpu, z_gpu, r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread):
    p, sb2, s02 = par_vec
    p = 10**p
    sb2 = 10**sb2
    
    cost = cost1x(p, sb2, s02, n_gpu, z_gpu, r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread)
    print(f"cost = {cost:.7f}, p = {p:.3e}, sb2 = {sb2:.3e}, s02 = {s02:.4f}", flush=True)
    return cost


def optimize1x(z, n, r2_het_hist, n_samples_grid, n_samples_local, gpu_rng_seed):
    p_lb, p_rb = -5, -2 # on log10 scale
    sb2_lb, sb2_rb = -6, -3 # on log10 scale
    s02_lb, s02_rb = 0.8, 2.5

    r2_het_hist_gpu = cuda.to_device(r2_het_hist)
    n_gpu = cuda.to_device(n)
    z_gpu = cuda.to_device(z)
    res_gpu = cuda.device_array(z.size, dtype='f8')
    rng_states = create_xoroshiro128p_states(z.size*THREADS_PER_BLOCK, seed=gpu_rng_seed)
    bounds = [(p_lb, p_rb), (sb2_lb, sb2_rb), (s02_lb, s02_rb)]
    
    samples_per_thread = int(n_samples_grid/THREADS_PER_BLOCK)
    print(f"Global opt with {samples_per_thread*THREADS_PER_BLOCK} samples per variant.")
    args_opt = (n_gpu, z_gpu, r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread)
    #x0 = [-3, -4.5, 1.0]
    res = differential_evolution(objf1x, bounds, args=args_opt, maxiter=5, popsize=10, polish=False, init='sobol')
    
    
    samples_per_thread = int(n_samples_local/THREADS_PER_BLOCK)
    print(f"Local opt with {samples_per_thread*THREADS_PER_BLOCK} samples per variant.")
    x0 = res.x
    args_opt = (n_gpu, z_gpu, r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread)
    res = minimize(objf1x, x0=x0, args=args_opt, method='Nelder-Mead', bounds=bounds,
            options={'maxiter':50, 'fatol':1E-5, 'xatol':1E-2})
    
    opt_par = [10**res.x[0], 10**res.x[1], res.x[2]]
    opt_res = dict(x=res.x.tolist(), fun=res.fun.tolist())
    for k in ("success", "status", "message", "nfev", "nit"):
        opt_res[k] = res.get(k)
    opt_out = dict(opt_res=opt_res, opt_par=opt_par, grid_cost=None, grid_par=None)
    return opt_out

In [13]:
# --- Bivariate optimization ---

def cost2x(p12, rho, rho0,
           p_1, sb2_1, s02_1, n_gpu_1, z_gpu_1,
           p_2, sb2_2, s02_2, n_gpu_2, z_gpu_2,
           r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread):
    blockspergrid = z_gpu_1.size
    cost2x_gpu[blockspergrid, THREADS_PER_BLOCK](p12, rho, rho0, p_1, sb2_1, s02_1, n_gpu_1, z_gpu_1,
                                                p_2, sb2_2, s02_2, n_gpu_2, z_gpu_2,
                                                r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread)
    return res_gpu.copy_to_host().mean()

@cuda.jit
def cost2x_gpu(p12, rho, rho0, p_1, sb2_1, s02_1, n_1, z_1,p_2, sb2_2, s02_2, n_2, z_2,
              r2_het_hist, res, rng_states, samples_per_thread):
    r2_het_hist_shared = cuda.shared.array(NBIN_R2_HET_HIST, dtype='f4')
    res_shared = cuda.shared.array(THREADS_PER_BLOCK, dtype='f8')
    
    pos = cuda.grid(1)
    i_thread_in_block = cuda.threadIdx.x
    i_block = cuda.blockIdx.x # i_block is also index of SNP
    
    p_causal = p_1 + p_2 - p12 # causal either in 1 or in 2
    p_causal_12 = p12/p_causal # causal in both 1 and 2
    p_causal_1 = p_1/p_causal # causal in 1
    L = 1/math.log1p(-p_causal)

    zt_1, nt_1, zt_2, nt_2 = z_1[i_block], n_1[i_block], z_2[i_block], n_2[i_block]
    
    for i in range(i_thread_in_block, NBIN_R2_HET_HIST, THREADS_PER_BLOCK):
        r2_het_hist_shared[i] = r2_het_hist[i_block*NBIN_R2_HET_HIST + i]
    res_shared[i_thread_in_block] = 0
    
    cuda.syncthreads() # wait for all threads in the block
    
    for i in range(samples_per_thread):
        rand = xoroshiro128p_uniform_float32(rng_states, pos)
        causal_i = math.ceil(L*math.log(1 - rand))
        n_passed = 0
        se2_1, se2_2 = 0, 0
        for j,n_in_bin in enumerate(r2_het_hist_shared):
            n_passed += n_in_bin
            while causal_i <= n_passed:
                r2_het = (0.5*j + 0.25)/NBIN_R2_HET_HIST # middle of the bin
                rand = xoroshiro128p_uniform_float32(rng_states, pos)
                if rand < p_causal_12:
                    se2_1 += r2_het
                    se2_2 += r2_het
                elif rand < p_causal_1:
                    se2_1 += r2_het
                else:
                    se2_2 += r2_het
                rand = xoroshiro128p_uniform_float32(rng_states, pos)
                causal_i += math.ceil(L*math.log(1 - rand)) # math.ceil returns float32 on GPU
        se2_1 = se2_1*sb2_1*nt_1
        se2_2 = se2_2*sb2_2*nt_2
        # covar matrix
        m11 = se2_1 + s02_1
        m22 = se2_2 + s02_2
        m12 = rho*math.sqrt(se2_1*se2_2) + rho0*math.sqrt(s02_1*s02_2)
        det = (m11*m22 - m12**2)
        res_shared[i_thread_in_block] += math.exp(-0.5*(m22*zt_1**2 + m11*zt_2**2 - 2*m12*zt_1*zt_2)/det) / (2*math.pi*math.sqrt(det))
    
    cuda.syncthreads()
    
    if i_thread_in_block == 0:
        s = 0
        for x in res_shared: s += x
        res[i_block] = -math.log(s/(THREADS_PER_BLOCK*samples_per_thread))


def objf2x(par_vec, p_1, sb2_1, s02_1, n_gpu_1, z_gpu_1,
           p_2, sb2_2, s02_2, n_gpu_2, z_gpu_2,
           r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread,
           p12_lb, p12_rb, rho_lb, rho_rb, rho0_lb, rho0_rb):
    p12, rho, rho0 = par_vec
    p12 = 10**p12
    
    cost = cost2x(p12, rho, rho0,
           p_1, sb2_1, s02_1, n_gpu_1, z_gpu_1,
           p_2, sb2_2, s02_2, n_gpu_2, z_gpu_2,
           r2_het_hist_gpu, res_gpu, rng_states, samples_per_thread)
    print(f"cost = {cost:.7f}, p12 = {p12:.3e}, rho = {rho:.4f}, rho0 = {rho0:.4f}", flush=True)
    return cost


def optimize2x(p_1, sb2_1, s02_1, n_1, z_1,
               p_2, sb2_2, s02_2, n_2, z_2,
               r2_het_hist, n_samples_grid, n_samples_local, gpu_rng_seed):
    p12_lb, p12_rb = -5.5, math.log10(min(p_1,p_2)) # on log10 scale
    assert p12_lb < p12_rb
    rho_lb, rho_rb = -1, 1
    rho0_lb, rho0_rb = -1, 1
     
    r2_het_hist_gpu = cuda.to_device(r2_het_hist)
    n_gpu_1, n_gpu_2 = cuda.to_device(n_1), cuda.to_device(n_2)
    z_gpu_1, z_gpu_2 = cuda.to_device(z_1), cuda.to_device(z_2)
    res_gpu = cuda.device_array(z_1.size, dtype='f8')
    rng_states = create_xoroshiro128p_states(z_1.size*THREADS_PER_BLOCK, seed=gpu_rng_seed)
    bounds = [(p12_lb, p12_rb), (rho_lb, rho_rb), (rho0_lb, rho0_rb)]
    
    # rough burn-in opt
    samples_per_thread = int(n_samples_grid/THREADS_PER_BLOCK)
    print(f"Starting burn-in opt with {samples_per_thread*THREADS_PER_BLOCK} samples per variant/thread")
    args_opt = (p_1, sb2_1, s02_1, n_gpu_1, z_gpu_1, p_2, sb2_2, s02_2, n_gpu_2, z_gpu_2, r2_het_hist_gpu,
                res_gpu, rng_states, samples_per_thread, p12_lb, p12_rb, rho_lb, rho_rb, rho0_lb, rho0_rb)
    #x0 = [0.5*(p12_lb + p12_rb), -0.15, 0.0]
    res = differential_evolution(objf2x, bounds, args=args_opt, maxiter=5, popsize=10, polish=False, init='sobol')
    #res = minimize(objf2x, x0=x0, args=args_opt, method='Nelder-Mead', bounds=bounds,
    #        options={'maxiter':50, 'fatol':1E-5, 'xatol':1E-2})
    
    # refined opt
    samples_per_thread = int(n_samples_local/THREADS_PER_BLOCK)
    print(f"Starting refined opt with {samples_per_thread*THREADS_PER_BLOCK} samples per variant/thread")
    args_opt = (p_1, sb2_1, s02_1, n_gpu_1, z_gpu_1, p_2, sb2_2, s02_2, n_gpu_2, z_gpu_2, r2_het_hist_gpu,
                res_gpu, rng_states, samples_per_thread, p12_lb, p12_rb, rho_lb, rho_rb, rho0_lb, rho0_rb)
    x0 = res.x
    res = minimize(objf2x, x0=x0, args=args_opt, method='Nelder-Mead', bounds=bounds,
            options={'maxiter':100, 'fatol':1E-6, 'xatol':1E-3})
    
    opt_par = [10**res.x[0], res.x[1], res.x[2]]
    opt_res = dict(x=res.x.tolist(), fun=res.fun.tolist())
    for k in ("success", "status", "message", "nfev", "nit"):
        opt_res[k] = res.get(k)
    opt_out = dict(opt_res=opt_res, opt_par=opt_par, grid_cost=None, grid_par=None)
    return opt_out

In [6]:
config_fname = "/cluster/p/p33/cluster/users/alexeas/x3mix/src/config/scz_lonely_bmi_config_3.json"
config = load_config(config_fname)

snps_df = load_snps(config["template_dir"], config["sumstats"],
        chromosomes=config["snp_filters"]["chromosomes"],
        z_thresh=config["snp_filters"]["z_thresh"],
        info_thresh=config["snp_filters"]["info_thresh"],
        maf_thresh=config["snp_filters"]["maf_thresh"],
        exclude_regions=config["snp_filters"]["exclude_regions"])

r2_het_hist, z_n_dict = load_opt_data(config["template_dir"], snps_df,
        r2_prune_thresh=config["pruning"]["r2_prune_thresh"],
        rand_prune_seed=config["pruning"]["rand_prune_seed"])

Reading template SNPs for 22 chromosomes from /cluster/projects/p33/users/alexeas/x3mix/data/template/ukb
    12926669 SNPs
Loading sumstats from /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/PGC_SCZ_2014.sumstats.gz
    9394032 SNPs
Loading sumstats from /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/UKB_LONELY_2018_MTAG.sumstats.gz
    7680423 SNPs
Loading sumstats from /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/GIANT_BMI_2018_UKB_v2.sumstats.gz
    2332775 SNPs
2324869 common SNPs
2324869 SNPs with matched alleles
2324823 SNPs with Z < 32
2220017 SNPs with INFO > 0.9
2000780 SNPs with MAF > 0.05
    72192 SNPs excluded from 6:25000000-33000000
    26035 SNPs excluded from 8:7200000-12500000
    26508 SNPs excluded from 17:40000000-47000000
    22362 SNPs excluded from 19:42000000-47000000
1990395 SNPs after all filters
Loading LD data
Processing chr 1
    13781 SNPs survive pruning
    944.67 mean size of LD block of pruned SNPs
Processing chr 2
    1

In [12]:
opt_out_1x_list = []
for i, sumstats in enumerate(config["sumstats"]):
    print(f"Running univariate optimization for {sumstats}")
    z, n = z_n_dict[f"Z_{i}"], z_n_dict[f"N_{i}"]
    opt_out = optimize1x(z, n, r2_het_hist,
            config["optimization"]["n_samples_grid_1x"],
            config["optimization"]["n_samples_local_1x"],
            config["optimization"]["gpu_rng_seed"])
    opt_out_1x_list.append(opt_out)
    print(opt_out["opt_res"])

Running univariate optimization for /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/PGC_SCZ_2014.sumstats.gz
Global opt with 16352 samples per variant.
cost = 1.6808694, p = 2.282e-03, sb2 = 1.855e-04, s02 = 1.8557
cost = 1.6298823, p = 4.020e-05, sb2 = 2.645e-06, s02 = 1.5690
cost = 1.6560129, p = 1.170e-04, sb2 = 4.025e-05, s02 = 2.1435
cost = 1.6553167, p = 7.824e-04, sb2 = 1.915e-05, s02 = 1.0621
cost = 1.6236453, p = 5.090e-04, sb2 = 1.044e-04, s02 = 1.4284
cost = 1.6461988, p = 1.798e-04, sb2 = 7.335e-06, s02 = 1.9926
cost = 1.7090080, p = 1.106e-05, sb2 = 4.844e-04, s02 = 0.8670
cost = 1.6702430, p = 8.292e-03, sb2 = 1.020e-06, s02 = 2.3348
cost = 1.6232991, p = 4.460e-03, sb2 = 5.630e-05, s02 = 0.9687
cost = 1.6760047, p = 1.944e-05, sb2 = 2.109e-05, s02 = 2.4228
cost = 1.6236442, p = 2.057e-04, sb2 = 3.786e-04, s02 = 1.3171
cost = 1.6394263, p = 4.226e-04, sb2 = 4.733e-06, s02 = 1.8685
cost = 1.7772984, p = 1.535e-03, sb2 = 8.212e-04, s02 = 2.2423
cost = 1.6521102, p =

Local opt with 65536 samples per variant.
cost = 1.6219803, p = 1.669e-03, sb2 = 6.926e-05, s02 = 1.3469
cost = 1.6221099, p = 1.212e-03, sb2 = 6.926e-05, s02 = 1.3469
cost = 1.6227176, p = 1.669e-03, sb2 = 4.290e-05, s02 = 1.3469
cost = 1.6235462, p = 1.669e-03, sb2 = 6.926e-05, s02 = 1.4143
cost = 1.6232874, p = 1.349e-03, sb2 = 5.033e-05, s02 = 1.2796
cost = 1.6223392, p = 1.422e-03, sb2 = 5.451e-05, s02 = 1.3133
cost = 1.6216134, p = 1.212e-03, sb2 = 9.530e-05, s02 = 1.3245
cost = 1.6220479, p = 1.033e-03, sb2 = 1.420e-04, s02 = 1.3133
cost = 1.6229051, p = 1.279e-03, sb2 = 1.089e-04, s02 = 1.3657
cost = 1.6218690, p = 1.385e-03, sb2 = 6.480e-05, s02 = 1.3264
cost = 1.6217537, p = 1.640e-03, sb2 = 8.197e-05, s02 = 1.3183
cost = 1.6213976, p = 1.177e-03, sb2 = 9.171e-05, s02 = 1.2991
cost = 1.6214556, p = 9.882e-04, sb2 = 1.055e-04, s02 = 1.2752
cost = 1.6220708, p = 1.272e-03, sb2 = 1.236e-04, s02 = 1.3016
cost = 1.6215843, p = 1.356e-03, sb2 = 7.615e-05, s02 = 1.3202
cost = 1.6218

cost = 1.5321814, p = 8.272e-03, sb2 = 1.823e-06, s02 = 0.8124
cost = 1.6148253, p = 1.387e-05, sb2 = 4.282e-06, s02 = 2.4146
cost = 1.5226843, p = 1.189e-03, sb2 = 1.749e-05, s02 = 0.8984
cost = 1.9182871, p = 2.568e-03, sb2 = 2.767e-04, s02 = 2.2966
cost = 1.8491461, p = 4.421e-03, sb2 = 1.294e-04, s02 = 1.4072
cost = 1.5420067, p = 2.458e-03, sb2 = 1.008e-06, s02 = 0.9029
cost = 1.5283010, p = 1.779e-04, sb2 = 1.624e-06, s02 = 1.0631
cost = 1.5208407, p = 1.434e-04, sb2 = 6.480e-05, s02 = 1.1128
cost = 1.5342967, p = 1.045e-03, sb2 = 3.223e-05, s02 = 1.2562
cost = 1.5249444, p = 5.547e-04, sb2 = 6.650e-06, s02 = 1.0311
cost = 1.5236514, p = 6.506e-04, sb2 = 9.868e-06, s02 = 1.3063
cost = 1.5435807, p = 1.072e-04, sb2 = 2.704e-04, s02 = 1.5241
cost = 1.5426565, p = 2.821e-03, sb2 = 1.157e-06, s02 = 1.6256
cost = 1.5232680, p = 9.530e-04, sb2 = 3.218e-05, s02 = 0.9158
cost = 1.5513999, p = 1.815e-05, sb2 = 1.462e-06, s02 = 0.8954
cost = 2.7934155, p = 9.360e-03, sb2 = 8.663e-04, s02 =

Global opt with 16352 samples per variant.
cost = 1.9482837, p = 1.911e-05, sb2 = 4.845e-05, s02 = 2.3452
cost = 1.9627627, p = 8.871e-04, sb2 = 2.261e-05, s02 = 1.3130
cost = 2.2814118, p = 4.067e-03, sb2 = 3.426e-04, s02 = 1.7922
cost = 2.2446737, p = 1.356e-04, sb2 = 2.646e-06, s02 = 1.1227
cost = 1.9304187, p = 1.331e-04, sb2 = 5.999e-04, s02 = 1.6396
cost = 1.9529241, p = 4.430e-03, sb2 = 1.596e-06, s02 = 2.2584
cost = 1.9682818, p = 6.256e-04, sb2 = 1.535e-04, s02 = 0.8246
cost = 1.9998175, p = 2.899e-05, sb2 = 6.757e-06, s02 = 1.9041
cost = 2.1296816, p = 3.845e-05, sb2 = 2.380e-04, s02 = 0.9602
cost = 1.9731933, p = 4.847e-04, sb2 = 4.356e-06, s02 = 2.0613
cost = 2.1078685, p = 7.909e-03, sb2 = 6.980e-05, s02 = 1.4759
cost = 1.9682736, p = 7.258e-05, sb2 = 1.373e-05, s02 = 2.0764
cost = 1.9278650, p = 2.416e-04, sb2 = 9.317e-05, s02 = 1.7480
cost = 2.0019979, p = 2.221e-03, sb2 = 9.721e-06, s02 = 1.0868
cost = 2.1183534, p = 1.174e-03, sb2 = 9.890e-04, s02 = 2.4144
cost = 2.142

cost = 1.9034394, p = 2.756e-03, sb2 = 3.040e-05, s02 = 1.4121
cost = 1.9034575, p = 2.695e-03, sb2 = 3.056e-05, s02 = 1.4116
cost = 1.9034419, p = 2.747e-03, sb2 = 3.025e-05, s02 = 1.4235
cost = 1.9034341, p = 2.828e-03, sb2 = 2.964e-05, s02 = 1.4176
cost = 1.9034624, p = 2.905e-03, sb2 = 2.910e-05, s02 = 1.4153
cost = 1.9034367, p = 2.812e-03, sb2 = 2.972e-05, s02 = 1.4125
cost = 1.9034378, p = 2.840e-03, sb2 = 2.912e-05, s02 = 1.4240
cost = 1.9034475, p = 2.819e-03, sb2 = 2.944e-05, s02 = 1.4211
cost = 1.9034363, p = 2.820e-03, sb2 = 2.968e-05, s02 = 1.4150
cost = 1.9034414, p = 2.791e-03, sb2 = 2.978e-05, s02 = 1.4209
cost = 1.9034379, p = 2.792e-03, sb2 = 3.002e-05, s02 = 1.4149
{'x': [-2.5485589730062452, -4.528131928581044, 1.4176041676174473], 'fun': 1.9034341202756164, 'success': True, 'status': 0, 'message': 'Optimization terminated successfully.', 'nfev': 44, 'nit': 21}


In [135]:
opt_out_2x_list = []
for i, j in ((0,1), (0,2), (1,2)):
    z_1, n_1 = z_n_dict[f"Z_{i}"], z_n_dict[f"N_{i}"]
    z_2, n_2 = z_n_dict[f"Z_{j}"], z_n_dict[f"N_{j}"]
    p_1, sb2_1, s02_1 = opt_out_1x_list[i]["opt_par"]
    p_2, sb2_2, s02_2 = opt_out_1x_list[j]["opt_par"]
    print(f"Running bivariate optimization for {config['sumstats'][i]} and {config['sumstats'][j]}")
    opt_out = optimize2x(p_1, sb2_1, s02_1, n_1, z_1, p_2, sb2_2, s02_2, n_2, z_2, r2_het_hist,
            config["optimization"]["n_samples_grid_2x"],
            config["optimization"]["n_samples_local_2x"],
            config["optimization"]["gpu_rng_seed"])
    opt_out_2x_list.append(opt_out)
    print(opt_out["opt_res"])
    print(f"opt_par = {opt_out['opt_par']}")

Running bivariate optimization for /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/PGC_SCZ_2014.sumstats.gz and /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/UKB_LONELY_2018_MTAG.sumstats.gz
Starting burn-in opt with 65504 samples per variant/thread
cost = 3.1397804, p12 = 8.731e-05, rho = 0.0000, rho0 = 0.0000
cost = 3.1397945, p12 = 5.472e-05, rho = 0.0000, rho0 = 0.0000
cost = 3.1397770, p12 = 8.731e-05, rho = 0.0003, rho0 = 0.0000
cost = 3.1397723, p12 = 8.731e-05, rho = 0.0000, rho0 = 0.0003
cost = 3.1397477, p12 = 1.393e-04, rho = 0.0002, rho0 = 0.0002
cost = 3.1397146, p12 = 2.223e-04, rho = 0.0003, rho0 = 0.0003
cost = 3.1397320, p12 = 1.628e-04, rho = 0.0003, rho0 = 0.0003
cost = 3.1396938, p12 = 2.466e-04, rho = 0.0001, rho0 = 0.0006
cost = 3.1396229, p12 = 4.145e-04, rho = 0.0001, rho0 = 0.0008
cost = 3.1395419, p12 = 6.967e-04, rho = 0.0004, rho0 = 0.0007
cost = 3.1393137, p12 = 1.968e-03, rho = 0.0007, rho0 = 0.0009
cost = 3.1393146, p12 = 1.968e-03, rho 

cost = 3.1383261, p12 = 2.411e-03, rho = 0.1867, rho0 = 0.0151
cost = 3.1383259, p12 = 2.411e-03, rho = 0.1863, rho0 = 0.0153
cost = 3.1383256, p12 = 2.411e-03, rho = 0.1867, rho0 = 0.0151
cost = 3.1383265, p12 = 2.411e-03, rho = 0.1862, rho0 = 0.0153
cost = 3.1383250, p12 = 2.411e-03, rho = 0.1862, rho0 = 0.0153
cost = 3.1383255, p12 = 2.411e-03, rho = 0.1862, rho0 = 0.0153
{'x': [-2.617863503543754, 0.1857724485212168, 0.015396388072196277], 'fun': 3.1383248969594604, 'success': True, 'status': 0, 'message': 'Optimization terminated successfully.', 'nfev': 94, 'nit': 49}
opt_par = [0.0024106629682424416, 0.1857724485212168, 0.015396388072196277]
Running bivariate optimization for /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/PGC_SCZ_2014.sumstats.gz and /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/GIANT_BMI_2018_UKB_v2.sumstats.gz
Starting burn-in opt with 65504 samples per variant/thread
cost = 3.5241603, p12 = 8.826e-05, rho = 0.0000, rho0 = 0.0000
cost = 3.524

cost = 3.5232384, p12 = 1.762e-03, rho = 0.0005, rho0 = 0.0005
cost = 3.5232452, p12 = 1.761e-03, rho = 0.0005, rho0 = 0.0005
cost = 3.5232465, p12 = 1.762e-03, rho = 0.0005, rho0 = 0.0005
cost = 3.5232435, p12 = 1.761e-03, rho = 0.0005, rho0 = 0.0005
cost = 3.5232464, p12 = 1.762e-03, rho = 0.0005, rho0 = 0.0005


KeyboardInterrupt: 

In [137]:
opt_out_2x_list = []
for i, j in ((0,2), (1,2)):
    z_1, n_1 = z_n_dict[f"Z_{i}"], z_n_dict[f"N_{i}"]
    z_2, n_2 = z_n_dict[f"Z_{j}"], z_n_dict[f"N_{j}"]
    p_1, sb2_1, s02_1 = opt_out_1x_list[i]["opt_par"]
    p_2, sb2_2, s02_2 = opt_out_1x_list[j]["opt_par"]
    print(f"Running bivariate optimization for {config['sumstats'][i]} and {config['sumstats'][j]}")
    opt_out = optimize2x(p_1, sb2_1, s02_1, n_1, z_1, p_2, sb2_2, s02_2, n_2, z_2, r2_het_hist,
            config["optimization"]["n_samples_grid_2x"],
            config["optimization"]["n_samples_local_2x"],
            config["optimization"]["gpu_rng_seed"])
    opt_out_2x_list.append(opt_out)
    print(opt_out["opt_res"])
    print(f"opt_par = {opt_out['opt_par']}")

Running bivariate optimization for /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/PGC_SCZ_2014.sumstats.gz and /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/GIANT_BMI_2018_UKB_v2.sumstats.gz
Starting burn-in opt with 65504 samples per variant/thread
cost = 3.5230318, p12 = 8.826e-05, rho = -0.1500, rho0 = 0.0000
cost = 3.5230616, p12 = 5.534e-05, rho = -0.1500, rho0 = 0.0000
cost = 3.5230082, p12 = 8.826e-05, rho = -0.1575, rho0 = 0.0000
cost = 3.5230241, p12 = 8.826e-05, rho = -0.1500, rho0 = 0.0003
cost = 3.5229374, p12 = 1.408e-04, rho = -0.1550, rho0 = 0.0002
cost = 3.5228466, p12 = 2.245e-04, rho = -0.1575, rho0 = 0.0003
cost = 3.5229020, p12 = 1.645e-04, rho = -0.1600, rho0 = 0.0003
cost = 3.5228089, p12 = 2.490e-04, rho = -0.1667, rho0 = 0.0001
cost = 3.5226374, p12 = 4.183e-04, rho = -0.1750, rho0 = 0.0001
cost = 3.5224427, p12 = 7.026e-04, rho = -0.1708, rho0 = 0.0004
cost = 3.5221699, p12 = 1.982e-03, rho = -0.1775, rho0 = 0.0007
cost = 3.5221795, p12 = 1.9

KeyboardInterrupt: 

In [14]:
opt_out_2x_list = []
for i, j in ((0,2), (1,2), (0,1)):
    z_1, n_1 = z_n_dict[f"Z_{i}"], z_n_dict[f"N_{i}"]
    z_2, n_2 = z_n_dict[f"Z_{j}"], z_n_dict[f"N_{j}"]
    p_1, sb2_1, s02_1 = opt_out_1x_list[i]["opt_par"]
    p_2, sb2_2, s02_2 = opt_out_1x_list[j]["opt_par"]
    print(f"Running bivariate optimization for {config['sumstats'][i]} and {config['sumstats'][j]}")
    opt_out = optimize2x(p_1, sb2_1, s02_1, n_1, z_1, p_2, sb2_2, s02_2, n_2, z_2, r2_het_hist,
            config["optimization"]["n_samples_grid_2x"],
            config["optimization"]["n_samples_local_2x"],
            config["optimization"]["gpu_rng_seed"])
    opt_out_2x_list.append(opt_out)
    print(opt_out["opt_res"])
    print(f"opt_par = {opt_out['opt_par']}")

Running bivariate optimization for /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/PGC_SCZ_2014.sumstats.gz and /cluster/projects/p33/users/alexeas/x3mix/data/sumstats/GIANT_BMI_2018_UKB_v2.sumstats.gz
Starting burn-in opt with 32768 samples per variant/thread
cost = 3.5305913, p12 = 7.886e-05, rho = -0.4039, rho0 = 0.1919
cost = 3.5865277, p12 = 2.410e-04, rho = 0.8337, rho0 = -0.6166
cost = 3.6383590, p12 = 1.508e-03, rho = -0.6515, rho0 = 0.6876
cost = 3.5240012, p12 = 1.473e-05, rho = 0.0812, rho0 = -0.1123
cost = 4.2145564, p12 = 4.082e-06, rho = -0.7953, rho0 = -0.9059
cost = 3.5908083, p12 = 9.536e-04, rho = 0.4749, rho0 = 0.2937
cost = 3.5569442, p12 = 1.690e-04, rho = -0.0400, rho0 = -0.4021
cost = 4.0267572, p12 = 2.422e-05, rho = 0.7196, rho0 = 0.7899
cost = 3.5521218, p12 = 2.509e-05, rho = -0.5316, rho0 = -0.2565
cost = 4.0608531, p12 = 9.767e-05, rho = 0.2269, rho0 = 0.9275
cost = 3.7615639, p12 = 4.950e-04, rho = -0.2880, rho0 = -0.7527
cost = 3.7264996, p12 = 5.

cost = 3.5220489, p12 = 1.447e-03, rho = -0.1376, rho0 = -0.0302
cost = 3.5220917, p12 = 1.841e-03, rho = -0.1406, rho0 = -0.0365
cost = 3.5220366, p12 = 1.554e-03, rho = -0.1254, rho0 = -0.0229
cost = 3.5220818, p12 = 1.961e-03, rho = -0.1151, rho0 = -0.0200
cost = 3.5220282, p12 = 1.561e-03, rho = -0.1320, rho0 = -0.0276
cost = 3.5220811, p12 = 1.429e-03, rho = -0.1086, rho0 = -0.0224
cost = 3.5220262, p12 = 1.701e-03, rho = -0.1316, rho0 = -0.0264
cost = 3.5220387, p12 = 1.509e-03, rho = -0.1448, rho0 = -0.0267
cost = 3.5220312, p12 = 1.654e-03, rho = -0.1221, rho0 = -0.0251
cost = 3.5220387, p12 = 1.726e-03, rho = -0.1317, rho0 = -0.0298
cost = 3.5220254, p12 = 1.596e-03, rho = -0.1270, rho0 = -0.0246
cost = 3.5220331, p12 = 1.583e-03, rho = -0.1383, rho0 = -0.0273
cost = 3.5220318, p12 = 1.636e-03, rho = -0.1261, rho0 = -0.0256
cost = 3.5220288, p12 = 1.648e-03, rho = -0.1293, rho0 = -0.0255
cost = 3.5220362, p12 = 1.578e-03, rho = -0.1295, rho0 = -0.0261
cost = 3.5220336, p12 = 1

cost = 3.5220276, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220376, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220336, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220391, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220328, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220323, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220368, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220343, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220303, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220260, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220374, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220263, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220306, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220355, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220304, p12 = 1.595e-03, rho = -0.1277, rho0 = -0.0248
cost = 3.5220327, p12 = 1

cost = 3.5455979, p12 = 3.050e-04, rho = -0.3139, rho0 = 0.7032
cost = 3.6617420, p12 = 8.499e-04, rho = 0.6290, rho0 = -0.8125
cost = 3.4453727, p12 = 3.264e-05, rho = -0.5400, rho0 = 0.4394
cost = 3.4244714, p12 = 2.005e-05, rho = 0.5300, rho0 = 0.1160
cost = 3.5948960, p12 = 5.504e-06, rho = 0.5976, rho0 = 0.6294
cost = 3.4915948, p12 = 2.159e-04, rho = 0.0509, rho0 = 0.5423
cost = 3.4223802, p12 = 1.699e-05, rho = 0.2211, rho0 = -0.0491
cost = 3.4458386, p12 = 5.661e-05, rho = 0.7169, rho0 = -0.3280
cost = 3.4212948, p12 = 1.063e-04, rho = 0.2368, rho0 = -0.0317
cost = 3.5686134, p12 = 8.977e-05, rho = 0.7848, rho0 = -0.6953
cost = 3.4232568, p12 = 4.953e-05, rho = -0.1322, rho0 = 0.0238
cost = 3.4236980, p12 = 2.359e-04, rho = 0.3157, rho0 = -0.0902
cost = 3.4404435, p12 = 7.786e-05, rho = 0.6839, rho0 = 0.2154
cost = 3.4338503, p12 = 4.688e-05, rho = 0.6180, rho0 = 0.1883
cost = 3.4216563, p12 = 5.411e-04, rho = 0.5138, rho0 = 0.0624
cost = 3.4416809, p12 = 2.139e-04, rho = 0.930

cost = 3.4189089, p12 = 2.290e-03, rho = 0.1296, rho0 = 0.0630
cost = 3.4189092, p12 = 2.306e-03, rho = 0.1303, rho0 = 0.0622
cost = 3.4189114, p12 = 2.281e-03, rho = 0.1304, rho0 = 0.0622
cost = 3.4189125, p12 = 2.292e-03, rho = 0.1302, rho0 = 0.0624
cost = 3.4189133, p12 = 2.302e-03, rho = 0.1299, rho0 = 0.0628
cost = 3.4189099, p12 = 2.310e-03, rho = 0.1303, rho0 = 0.0624
cost = 3.4189108, p12 = 2.320e-03, rho = 0.1300, rho0 = 0.0627
cost = 3.4189149, p12 = 2.327e-03, rho = 0.1304, rho0 = 0.0623
cost = 3.4189129, p12 = 2.308e-03, rho = 0.1300, rho0 = 0.0627
cost = 3.4189133, p12 = 2.320e-03, rho = 0.1303, rho0 = 0.0624
cost = 3.4189138, p12 = 2.311e-03, rho = 0.1301, rho0 = 0.0626
cost = 3.4189165, p12 = 2.312e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189170, p12 = 2.316e-03, rho = 0.1301, rho0 = 0.0626
cost = 3.4189131, p12 = 2.311e-03, rho = 0.1302, rho0 = 0.0626
cost = 3.4189145, p12 = 2.307e-03, rho = 0.1303, rho0 = 0.0624
cost = 3.4189087, p12 = 2.309e-03, rho = 0.1302, rho0 =

cost = 3.4189129, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189000, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189129, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189125, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189128, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189109, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189054, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189120, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189110, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189111, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189102, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189108, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189091, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189052, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189083, p12 = 2.314e-03, rho = 0.1303, rho0 = 0.0625
cost = 3.4189079, p12 = 2.314e-03, rho = 0.1303, rho0 =

cost = 3.3819061, p12 = 6.573e-05, rho = 0.8003, rho0 = 0.5797
cost = 3.9815082, p12 = 1.327e-05, rho = -0.9792, rho0 = -0.7836
cost = 3.2433373, p12 = 3.664e-06, rho = 0.4465, rho0 = 0.4616
cost = 3.1470811, p12 = 1.175e-04, rho = -0.8423, rho0 = 0.0778
cost = 3.1679085, p12 = 4.457e-06, rho = -0.1556, rho0 = 0.3285
cost = 3.2214766, p12 = 1.083e-03, rho = -0.0342, rho0 = -0.3999
cost = 4.5043700, p12 = 1.342e-05, rho = -0.5757, rho0 = -0.9392
cost = 3.1431232, p12 = 2.285e-05, rho = -0.3689, rho0 = 0.1662
cost = 3.1488344, p12 = 6.767e-05, rho = -0.1466, rho0 = -0.0983
cost = 3.1620457, p12 = 8.197e-04, rho = 0.9847, rho0 = 0.1348
cost = 3.2005048, p12 = 1.161e-05, rho = -0.8825, rho0 = 0.4895
cost = 3.1388529, p12 = 8.677e-06, rho = 0.1006, rho0 = 0.0316
cost = 3.3451510, p12 = 1.376e-05, rho = -0.2623, rho0 = -0.5603
cost = 3.1406822, p12 = 2.587e-05, rho = 0.1292, rho0 = 0.1038
cost = 3.2523430, p12 = 1.111e-05, rho = 0.0557, rho0 = -0.4694
cost = 3.1578173, p12 = 7.644e-05, rho =

cost = 3.1383598, p12 = 2.112e-03, rho = 0.2499, rho0 = 0.0115
cost = 3.1383971, p12 = 2.332e-03, rho = 0.2824, rho0 = -0.0000
cost = 3.1384253, p12 = 1.159e-03, rho = 0.2300, rho0 = 0.0106
cost = 3.1383807, p12 = 1.958e-03, rho = 0.2246, rho0 = 0.0221
cost = 3.1383933, p12 = 2.332e-03, rho = 0.2414, rho0 = 0.0188
cost = 3.1383795, p12 = 2.290e-03, rho = 0.2353, rho0 = 0.0190
cost = 3.1383823, p12 = 2.332e-03, rho = 0.2605, rho0 = 0.0113
cost = 3.1383754, p12 = 2.332e-03, rho = 0.2486, rho0 = 0.0144
cost = 3.1383752, p12 = 2.332e-03, rho = 0.2645, rho0 = 0.0079
cost = 3.1383805, p12 = 2.223e-03, rho = 0.2734, rho0 = 0.0036
cost = 3.1383711, p12 = 2.273e-03, rho = 0.2448, rho0 = 0.0151
cost = 3.1383648, p12 = 2.146e-03, rho = 0.2576, rho0 = 0.0086
cost = 3.1383627, p12 = 2.030e-03, rho = 0.2370, rho0 = 0.0157
cost = 3.1383590, p12 = 1.932e-03, rho = 0.2515, rho0 = 0.0087
cost = 3.1383688, p12 = 1.781e-03, rho = 0.2549, rho0 = 0.0055
cost = 3.1383548, p12 = 1.908e-03, rho = 0.2347, rho0 

cost = 3.1383594, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383549, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383576, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383574, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383597, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383565, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383576, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383570, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383572, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383581, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383588, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383588, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383610, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383584, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383576, p12 = 1.908e-03, rho = 0.2347, rho0 = 0.0154
cost = 3.1383573, p12 = 1.908e-03, rho = 0.2347, rho0 =