In [1]:
import subprocess
from collections import defaultdict
import os
import os.path as osp
import re
import pickle

import numpy as np

In [2]:
RE_FNAME = re.compile(r'(BBA\d{4})_alpha_(\d+).aln')
RE_OUTPUT_CS = re.compile(r'CS score= (\d\.\d+)')
RE_OUTPUT_SP = re.compile(r'SP score= (\d\.\d+)')
RE_OUTPUT_CS_JAVA = re.compile(r'TC (\d\.\d+)')
RE_OUTPUT_SP_JAVA = re.compile(r'SP-Score (\d\.\d+)')
ALIGNMENTS_PRALINE_DIR = './alignments/praline/'
ALIGNMENTS_REFERENCE_DIR = './RV100/'
SCORE_BINARY = './bin/bali_score'
SCORE_JAR = './FastSP/FastSP.jar'

In [3]:
cs_score_per_group_alpha = defaultdict(dict)
sp_score_per_group_alpha = defaultdict(dict)

for fname in os.listdir(ALIGNMENTS_PRALINE_DIR):
    m = RE_FNAME.match(fname)
    if not m:
        continue
    
    balibase_group = m.group(1)
    alpha = int(m.group(2))
    
    ref_path = osp.join(ALIGNMENTS_REFERENCE_DIR, "{}.xml".format(balibase_group))
    aln_path = osp.join(ALIGNMENTS_PRALINE_DIR, fname)
    
    output = subprocess.check_output([SCORE_BINARY, ref_path, aln_path])
    
    cs_score = float(RE_OUTPUT_CS.search(output).group(1))
    sp_score = float(RE_OUTPUT_SP.search(output).group(1))
    
    cs_score_per_group_alpha[balibase_group][alpha] = cs_score
    sp_score_per_group_alpha[balibase_group][alpha] = sp_score

In [7]:
cs_score_per_group_alpha = defaultdict(dict)
sp_score_per_group_alpha = defaultdict(dict)

for fname in os.listdir(ALIGNMENTS_PRALINE_DIR):
    m = RE_FNAME.match(fname)
    if not m:
        continue
    
    balibase_group = m.group(1)
    alpha = int(m.group(2))
    
    ref_path = osp.join(ALIGNMENTS_REFERENCE_DIR, "{}.aln".format(balibase_group))
    aln_path = osp.join(ALIGNMENTS_PRALINE_DIR, fname)
    
    output = subprocess.check_output(['java', '-jar', SCORE_JAR, '-r', ref_path, '-e', aln_path])
    
    cs_score = float(RE_OUTPUT_CS_JAVA.search(output).group(1))
    sp_score = float(RE_OUTPUT_SP_JAVA.search(output).group(1))
    
    cs_score_per_group_alpha[balibase_group][alpha] = cs_score
    sp_score_per_group_alpha[balibase_group][alpha] = sp_score

In [8]:
ALPHAS = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 100]

cs_scores_per_alpha = defaultdict(list)
sp_scores_per_alpha = defaultdict(list)

for alpha in ALPHAS:
    for score_dict in cs_score_per_group_alpha.itervalues():
        cs_scores_per_alpha[alpha].append(score_dict[alpha])
    for score_dict in sp_score_per_group_alpha.itervalues():
        sp_scores_per_alpha[alpha].append(score_dict[alpha])

In [9]:
for alpha in ALPHAS:
    scores = cs_scores_per_alpha[alpha]
    
    a = np.array(scores)
    print "{:>3}  {:.3f}  {:.3f}".format(alpha, a.mean(), a.std())

  0  0.334  1.018
  5  0.224  0.518
 10  0.213  0.461
 15  0.213  0.648
 20  0.224  0.685
 25  0.246  0.886
 30  0.214  0.876
 35  0.155  0.460
 40  0.187  0.915
 45  0.200  0.952
 50  0.159  0.837
100  0.217  1.160


In [10]:
for alpha in ALPHAS:
    scores = sp_scores_per_alpha[alpha]
    
    a = np.array(scores)
    print "{:>3}  {:.3f}  {:.3f}".format(alpha, a.mean(), a.std())

  0  0.664  0.188
  5  0.668  0.186
 10  0.661  0.188
 15  0.646  0.189
 20  0.630  0.191
 25  0.603  0.197
 30  0.576  0.202
 35  0.554  0.201
 40  0.528  0.201
 45  0.502  0.199
 50  0.480  0.202
100  0.341  0.190


In [11]:
with open('./sp_scores.pickle', 'wb') as fo:
    pickle.dump((sp_scores_per_alpha, sp_score_per_group_alpha), fo)