## Format mlink input

In [None]:
#linkage format, .pre and .loc
#per locus, per family based

def format_linkage(tped, tfam, prev, wild_pen, muta_pen, inherit_mode, theta_max, theta_inc):
    out_base = '{}/LINKAGE/{}'.format(env.outdir, splitext(basename(tped))[0])
    with open(tped) as tped_fh, open(tfam) as tfam_fh:
        fams = parse_tfam(tfam_fh)
        #parse per family per locus AF file
        af = defaultdict(lambda: [])
        #try to open the file for allele frequencies, otherwise use the default value
        try:
            with open(os.path.join(env.tmp_cache, basename(out_base) + '.freq')) as af_fh:
                for line in af_fh:
                    s = line.strip().split()
                    freq = list(map(lambda x: max(1e-3, float(x)), s[2:]))
                    relativefreq = np.array(freq)/sum(freq)
                    af[(s[0],s[1])] = list(map(str, relativefreq))
        except IOError:
            env.error('freq info not properly read for [{}]'.format(basename(out_base)))
        #parse tped
        heter_pen = wild_pen
        if inherit_mode == 'AD':
            heter_pen = muta_pen
        for line in tped_fh:
            s = line.strip().split()
            gene, gno = re.search(r'^(\S+?)(?:\[(\d+)\])?$', s[1]).groups()
            if not gno:
                gno = '0'
                with env.format_counter.get_lock():
                    env.format_counter.value += 1
            elif gno == '1':
                with env.format_counter.get_lock():
                    env.format_counter.value += 1
            if env.format_counter.value % (env.batch * env.jobs) == 0:
                env.log('{:,d} units processed {{{:.2%}}} ...'.format(env.format_counter.value, float(env.format_counter.value)/env.success_counter.value), flush=True)
            for fid in fams:
                workdir = '{}/{}/{}'.format(out_base, gene, fid)
                with env.lock:
                    mkpath(workdir)
                #env.error("fid {} num {}\n".format(fid, fams[fid].get_member_ids()))
                fam_af = af[(fid, s[1])]
                if not fam_af:
                    #env.log('All missing in this family {} on {}[{}], skipped ...'.format(fid, gene, gno), flush=True)
                    with env.skipped_counter.get_lock():
                        env.skipped_counter.value += 1
                    removeEmptyDir(workdir)
                    continue
                ids = fams[fid].get_sorted_ids()
                idxes = list(map(lambda x: fams[fid].get_member_idx(x), ids))
                gs = list(map(lambda x: s[2 * x + 4 : 2 * x + 6], idxes))
                gs_num = len(set(filter(lambda x: x != '0', chain(*gs))))
                if gs_num >= 10:
                    with env.skipped_counter.get_lock():
                        env.skipped_counter.value += 1
                    removeEmptyDir(workdir)
                    continue
                with env.lock:
                    mkpath(workdir)
                with open('{}/{}.PRE'.format(workdir, gno), 'w') as pre:
                    pre.write(''.join("{} {} {} {}\n".format(fid, fams[fid].print_member(pid), s[2*fams[fid].get_member_idx(pid) + 4], s[2*fams[fid].get_member_idx(pid) + 5]) for pid in ids))
                with open('{}/{}.LOC'.format(workdir, gno), 'w') as loc:
                    loc.write("2 0 0 5\n")
                    loc.write("0 0.0 0.0 0\n")
                    loc.write("1 2\n")
                    loc.write("1 2\n")
                    loc.write(" {} {}\n".format(1 - prev, prev))
                    loc.write(" 1\n")
                    loc.write(" {} {} {}\n".format(wild_pen, heter_pen, muta_pen))
                    loc.write("3 {}\n".format(gs_num))
                    loc.write(' ' + ' '.join(fam_af) + "\n")
                    loc.write("0 0\n")
                    loc.write("0.0\n")
                    loc.write("1 {} {}\n".format(theta_inc, theta_max))

            removeEmptyDir('{}/{}'.format(out_base, gene))
    tped_fh.close()
    tfam_fh.close()
    removeEmptyDir('{}'.format(out_base))



## Run mlink