In [1]:
# First Import the libraries needed;

# basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("whitegrid")
from IPython.core.display import display, HTML

# proalign libraries
import string_db
import interface
import organism
import align
import constants as cs


In [2]:
# basic percent generator if is number
def as_percent(v, precision='0.2'):  
    """Convert number to percentage string."""
    try:
        return "{{:{}%}}".format(precision).format(float(v))
    except:
        return v


In [3]:
%%capture
# Initialization:

# organism names and IDs
organism_ids = ['4932', '7227', '9606']
organism_names = ['yeast', 'fly', 'human']

organism_ids = ['4932', '7227']
organism_names = ['yeast', 'fly']

# read organism PPI networks
organism_objs = [x for x in map(string_db.parse_organism, organism_ids)]


In [4]:
# a function to run an alignment and return the results (make sure to update constants if needed)
def alignment_result(orgs, align_method, similarity_mode='raw_blast'):
    # sort ids
    orgs.sort()
    
    # get Organism network objects
    orgobjs = [organism_objs[organism_ids.index(x)] for x in orgs]

    # create the combined biological alignment network
    bio_net = organism.BioNet(orgobjs[0], orgobjs[1], similarity_mode)
    
    # create aligner object
    aligner = align.Aligner(align_method)
    
    # align the networks
    aligner.align(bio_net)
    
    # create a dictionary for the results and fill
    results = {}
    results['CE'] = aligner.ce
    results['EC'] = aligner.ec
    results['LCCS'] = aligner.lccs
    results['ICS'] = aligner.ics
    results['S^3'] = aligner.s3
    # results['GOC'] = aligner.GOC
    # results['PWS1'] = aligner.PWS1
    # results['PWS2'] = aligner.PWS2
    # results['NBS'] = aligner.nbs
    results['Frobenius Norm'] = aligner.frobenius
    
    # return all results
    return results



---

## Algorithms Comparison:

Let's see how different algorithms score on various datasets:

In [5]:
%%capture
# Now that we have all required initial setups let's gather some information:

# our main algorithms constants:
cs.SEED_KEEP_RATIO = 0.3
cs.MAX_SEED_SIZE = 1400
# cs.SEED_PR_ALPHA

# algorithm selection
algs = ['isoN', 'NETAL', 'pinalog', 'CGRAAL', 'GRAAL', 'MIGRAAL', 'HubAlign', 'MAGNA', 'PROPER',
        'SPINAL-I', 'SPINAL-II']


dfs = {}

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']

columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'Frobenius Norm']

for i1 in range(len(organism_ids)):
    for i2 in range(i1 + 1, len(organism_ids)):
        name = '{}-{}'.format(organism_names[i1], organism_names[i2])

        # organism selestion:
        orgs = [organism_ids[i1], organism_ids[i2]]

        # a list to store all data in
        data = []

        for alg in algs:
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data, index=algs, columns=columns)
        dfs[name] = df


In [6]:
# let's take a look at the tables:
for i in dfs:
    display(HTML('<h1>{}</h1>'.format(i)))
    display(dfs[i])

print('Main parameteres: SEED_KEEP_RATIO: {}, MAX_SEED_SIZE: {}, PAGERANK_ALPHA: {}'.format(
      cs.SEED_KEEP_RATIO, cs.MAX_SEED_SIZE, cs.SEED_PR_ALPHA))

Unnamed: 0,CE,EC,LCCS,ICS,S^3,Frobenius Norm
isoN,6990,0.509512,544,0.774429,0.443669,172.232401
NETAL,212,0.015453,35,0.010716,0.006368,253.373242
pinalog,9400,0.685181,731,0.561898,0.446598,165.402539
CGRAAL,5688,0.414607,661,0.273857,0.197493,216.041663
GRAAL,5944,0.433268,210,0.342357,0.236464,213.658606
MIGRAAL,5925,0.431883,549,0.30587,0.218112,213.836386
HubAlign,8605,0.627232,224,0.414319,0.332458,187.098904
MAGNA,95,0.006925,17,0.005149,0.002962,262.766056
PROPER,10039,0.731759,771,0.525437,0.440578,161.027948
SPINAL-I,9690,0.70632,737,0.489147,0.406477,167.056877


Main parameteres: SEED_KEEP_RATIO: 0.3, MAX_SEED_SIZE: 1400, PAGERANK_ALPHA: 0.85


In [7]:
# Or view all scores combined:
print('Main parameteres: SEED_KEEP_RATIO: {}, MAX_SEED_SIZE: {}'.format(
      cs.SEED_KEEP_RATIO, cs.MAX_SEED_SIZE, cs.SEED_PR_ALPHA))

net_names = ['yeast-fly', 'yeast-human', 'fly-human']
net_names = ['yeast-fly']
pd.concat([dfs[x] for x in net_names], axis=0, keys=net_names)


Main parameteres: SEED_KEEP_RATIO: 0.3, MAX_SEED_SIZE: 1400


Unnamed: 0,Unnamed: 1,CE,EC,LCCS,ICS,S^3,Frobenius Norm
yeast-fly,isoN,6990,0.509512,544,0.774429,0.443669,172.232401
yeast-fly,NETAL,212,0.015453,35,0.010716,0.006368,253.373242
yeast-fly,pinalog,9400,0.685181,731,0.561898,0.446598,165.402539
yeast-fly,CGRAAL,5688,0.414607,661,0.273857,0.197493,216.041663
yeast-fly,GRAAL,5944,0.433268,210,0.342357,0.236464,213.658606
yeast-fly,MIGRAAL,5925,0.431883,549,0.30587,0.218112,213.836386
yeast-fly,HubAlign,8605,0.627232,224,0.414319,0.332458,187.098904
yeast-fly,MAGNA,95,0.006925,17,0.005149,0.002962,262.766056
yeast-fly,PROPER,10039,0.731759,771,0.525437,0.440578,161.027948
yeast-fly,SPINAL-I,9690,0.70632,737,0.489147,0.406477,167.056877


---

## Parameter Comparison

Now Let's compare different parameters from our own algorithm:

---

### bucket size & keep ratio


In [13]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle-prd', 'seedexsingle-prd-jac', 'seedexsingle-prd-ada']


# list of constants to compare:
bucket_size = [800, 1000, 1200, 1400]
keep_ratio = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
pagerank_alpha = [0.85]

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'Frobenius Norm']

# organism selestion:
orgs = ['4932', '7227']

alldfs1 = []

for alpha in pagerank_alpha:
    cs.SEED_PR_ALPHA = alpha
    alpha_dfs = []
    
    for alg in our_algs:
        alg_dfs = []
        
        for bs in bucket_size:
            cs.MAX_SEED_SIZE = bs
            data = []
            
            for kr in keep_ratio:
                cs.SEED_KEEP_RATIO = kr
                aln = alignment_result(orgs, alg)
                data.append(aln)

            df = pd.DataFrame(data=data,
                              index=['keep ratio: {}'.format(x) for x in keep_ratio],
                              columns=columns)

            alg_dfs.append(df)

        cdfs1 = pd.concat(alg_dfs, axis=0, keys=['bucket size: {}'.format(x) for x in bucket_size])
        alldfs1.append(cdfs1)

calldfs1 = pd.concat(alldfs1, axis=0, keys=our_algs)



In [14]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(calldfs1)



Unnamed: 0,Unnamed: 1,Unnamed: 2,CE,EC,LCCS,ICS,S^3,Frobenius Norm
seedexsingle-prd,bucket size: 800,keep ratio: 0.1,9214,0.671623,605,0.447825,0.367384,180.471604
seedexsingle-prd,bucket size: 800,keep ratio: 0.2,9242,0.673664,668,0.451093,0.370198,180.161039
seedexsingle-prd,bucket size: 800,keep ratio: 0.3,8953,0.652599,334,0.436625,0.35428,183.341212
seedexsingle-prd,bucket size: 800,keep ratio: 0.4,9424,0.686931,625,0.459371,0.379847,178.129167
seedexsingle-prd,bucket size: 800,keep ratio: 0.5,9668,0.704716,666,0.470119,0.392753,175.368184
seedexsingle-prd,bucket size: 800,keep ratio: 0.6,9476,0.690721,554,0.460604,0.38185,177.544361
seedexsingle-prd,bucket size: 800,keep ratio: 0.7,9307,0.678402,567,0.454532,0.373955,179.438012
seedexsingle-prd,bucket size: 1000,keep ratio: 0.1,8805,0.641811,380,0.429407,0.346394,184.948642
seedexsingle-prd,bucket size: 1000,keep ratio: 0.2,9792,0.713755,633,0.475686,0.399478,173.948268
seedexsingle-prd,bucket size: 1000,keep ratio: 0.3,9794,0.7139,597,0.47841,0.401443,173.925271



---

### bucket size & seed size


In [None]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle-prd']

# list of constants to compare:
bucket_size = [800, 1000, 1200, 1400]
seed_size = [100, 200, 300, 400, 500, 600, 700, 800]
pagerank_alpha = [0.85]

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'Frobenius Norm']

# organism selestion:
orgs = ['4932', '7227']

alldfs2 = []

for alpha in pagerank_alpha:
    cs.SEED_PR_ALPHA = alpha
    alpha_dfs = []
    
    for alg in our_algs:
        dfs2 = []
        for bs in bucket_size:
            cs.MAX_SEED_SIZE = bs
            data = []
            for ss in seed_size:
                cs.SEED_KEEP_RATIO = ss / bs
                aln = alignment_result(orgs, alg)
                data.append(aln)

            df = pd.DataFrame(data=data,
                              index=['seed size: {}'.format(x) for x in seed_size],
                              columns=columns)

            dfs2.append(df)

        cdfs2 = pd.concat(dfs2, axis=0, keys=['bucket size: {}'.format(x) for x in bucket_size])
        alldfs2.append(cdfs2)

calldfs2 = pd.concat(alldfs2, axis=0, keys=our_algs)



In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(calldfs2)



---

### Normalized blast input


In [None]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

# list of constants to compare:
bucket_size = [1000, 1200, 1400]
seed_size = [300, 400, 500]

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']

# organism selestion:
orgs = ['4932', '7227']

alldfs3 = []
for alg in our_algs:
    dfs3 = []
    for bs in bucket_size:
        cs.MAX_SEED_SIZE = bs
        data = []
        for ss in seed_size:
            cs.SEED_KEEP_RATIO = ss / bs
            aln = alignment_result(orgs, alg, similarity_mode='rel_blast')
            data.append(aln)

        df = pd.DataFrame(data=data,
                          index=['seed size: {}'.format(x) for x in seed_size],
                          columns=columns)

        dfs3.append(df)

    cdfs3 = pd.concat(dfs3, axis=0, keys=['bucket size: {}'.format(x) for x in bucket_size])
    alldfs3.append(cdfs3)

calldfs3 = pd.concat(alldfs3, axis=0, keys=our_algs)



In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(calldfs3)



### seed bucket

In [None]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

# list of constants to compare:
bucket_size = [800, 1000, 1200, 1400]
seed_size = [100, 200, 300, 400, 500, 600, 700, 800]

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']

# organism selestion:
orgs = ['4932', '7227']

alldfs4 = []
for alg in our_algs:
    dfs4 = []
    for ss in seed_size:
        data = []
        for bs in bucket_size:
            cs.SEED_KEEP_RATIO = ss / bs
            cs.MAX_SEED_SIZE = bs
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data,
                          index=['bucket size: {}'.format(x) for x in bucket_size],
                          columns=columns)

        dfs4.append(df)

    cdfs4 = pd.concat(dfs4, axis=0, keys=['seed size: {}'.format(x) for x in seed_size])
    alldfs4.append(cdfs4)

calldfs4 = pd.concat(alldfs4, axis=0, keys=our_algs)



In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(calldfs4)



### network information

info, yeast, fly, human
nodes,
edges,
giant component,
average degree,
max degree,
components,


to tex

In [None]:
infocul = ['nodes', 'edges', 'giant component', 'average degree', 'max degree', 'components', 'clustering coefficient']
def collect_information(orgobj):
    info = {}
    info['nodes'] = orgobj.node_count
    info['edges'] = int(sum(orgobj.degree)/2)
    info['components'] = orgobj.components()[0]
    unique, counts = np.unique(orgobj.components()[1], return_counts=True)
    info['giant component'] = max(counts)
    info['average degree'] = int(np.average(orgobj.degree)/2)
    info['max degree'] = int(max(orgobj.degree)/2)
    adj = orgobj.adjacency
    info['clustering coefficient'] = (np.linalg.matrix_power(adj, 3).trace() / 
                                      (np.linalg.matrix_power(adj, 2).sum() - 
                                       np.linalg.matrix_power(adj, 2).trace()))
#     print((adj**2).trace())
#     print((adj**2).sum())
#     print(((adj**3).trace() / (adj**2).sum()))
#     adj2 = adj * adj
#     adj3 = adj2 * adj
#     tr3 = adj3.trace()
    
    return info
    
    


In [None]:
infos = []
for orgobj in organism_objs:
    info = collect_information(orgobj)
    infos.append(info)
# print(infos)
df = pd.DataFrame(data=infos,
                  index=organism_names,
                  columns=infocul)
df.T


In [None]:
print(df.T.to_latex())


### algorithm GO/CE all networks

In [None]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

# list of constants to compare:
bucket_size = [600, 1000, 1400]
seed_size = [100, 300, 600]

# ignore PWS2
columns = ['EC', 'S^3']

# organism selestion:
orgs = ['4932', '7227']

alldfs5 = []
for alg in our_algs:
    dfs5 = []
    for bs in bucket_size:
        data = []
        for ss in seed_size:
            cs.SEED_KEEP_RATIO = ss / bs
            cs.MAX_SEED_SIZE = bs
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data,
                          index=['seed size: {}'.format(x) for x in seed_size],
                          columns=columns)
        perc_culs = ['EC', 'S^3']
        for x in perc_culs:
            df[x] = df[x].apply(as_percent)

        dfs5.append(df)

    cdfs5 = pd.concat(dfs5, axis=0, keys=['bucket size: {}'.format(x) for x in bucket_size])
    alldfs5.append(cdfs5)

calldfs5 = pd.concat(alldfs5, axis=1, keys=our_algs)


In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(calldfs5)



In [None]:
print(calldfs5.to_latex())

          cns,    jsc,   ada
          ce,ec,s3
bucket            keep_ratio    seed size

600,1000,1400     0.1,0.3,0.5   100, 300, 600


In [None]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

# list of constants to compare:
bucket_size = [600, 1000, 1400]
keep_ratio = [0.1, 0.3, 0.5]

# ignore PWS2
columns = ['EC', 'S^3']

# organism selestion:
orgs = ['4932', '7227']

alldfs6 = []
for alg in our_algs:
    dfs6 = []
    for bs in bucket_size:
        cs.MAX_SEED_SIZE = bs
        data = []
        for kr in keep_ratio:
            cs.SEED_KEEP_RATIO = kr
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data,
                          index=['keep ratio: {}'.format(x) for x in keep_ratio],
                          columns=columns)
        perc_culs = ['EC', 'S^3']
        for x in perc_culs:
            df[x] = df[x].apply(as_percent)

        dfs6.append(df)

    cdfs6 = pd.concat(dfs6, axis=0, keys=['bucket size: {}'.format(x) for x in bucket_size])
    alldfs6.append(cdfs6)

calldfs6 = pd.concat(alldfs6, axis=1, keys=our_algs)



In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(calldfs6)



In [None]:
print(calldfs6.to_latex())

In [None]:
%%capture
# Now that we have all required initial setups let's gather some information:

# our main algorithms constants:
cs.SEED_KEEP_RATIO = 0.3
cs.MAX_SEED_SIZE = 1400

# algorithm selection
algs = ['greedy', 'NETAL', 'pinalog', 'CGRAAL', 'GRAAL', 'MIGRAAL', 'HubAlign', 'MAGNA', 'PROPER',
        'SPINAL-I', 'SPINAL-II', 'seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

dfs = {}

# ignore PWS2
columns = ['GOC']

for i1 in range(len(organism_ids)):
    for i2 in range(i1 + 1, len(organism_ids)):
        name = '{}-{}'.format(organism_names[i1], organism_names[i2])

        # organism selestion:
        orgs = [organism_ids[i1], organism_ids[i2]]

        # a list to store all data in
        data = []

        for alg in algs:
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data, index=algs, columns=columns)
        dfs[name] = df



In [None]:
net_names = ['yeast-fly', 'yeast-human', 'fly-human']
df = pd.concat([dfs[x] for x in net_names], axis=0, keys=net_names)


In [None]:
%%capture
# Now that we have all required initial setups let's gather some information:

# our main algorithms constants:
cs.SEED_KEEP_RATIO = 0.3
cs.MAX_SEED_SIZE = 1400

# algorithm selection
algs = ['greedy', 'NETAL', 'pinalog', 'CGRAAL', 'GRAAL', 'MIGRAAL', 'HubAlign', 'MAGNA', 'PROPER',
        'SPINAL-I', 'SPINAL-II', 'seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

dfs = {}

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']

for i1 in range(len(organism_ids)):
    for i2 in range(i1 + 1, len(organism_ids)):
        name = '{}-{}'.format(organism_names[i1], organism_names[i2])

        # organism selestion:
        orgs = [organism_ids[i1], organism_ids[i2]]

        # a list to store all data in
        data = []

        for alg in algs:
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data, index=algs, columns=columns)
        dfs[name] = df


In [None]:
net_names = ['yeast-fly', 'yeast-human', 'fly-human']
df = pd.concat([dfs[x] for x in net_names], axis=1, keys=net_names)
print(df.to_latex())


In [None]:
%%capture
# Now that we have all required initial setups let's gather some information:

# our main algorithms constants:
cs.SEED_KEEP_RATIO = 0.3
cs.MAX_SEED_SIZE = 1400

# algorithm selection
algs = ['greedy', 'NETAL', 'pinalog', 'CGRAAL', 'GRAAL', 'MIGRAAL', 'HubAlign', 'MAGNA', 'PROPER',
        'SPINAL-I', 'SPINAL-II', 'seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

dfs = {}

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']

for i1 in range(len(organism_ids)):
    for i2 in range(i1 + 1, len(organism_ids)):
        name = '{}-{}'.format(organism_names[i1], organism_names[i2])

        # organism selestion:
        orgs = [organism_ids[i1], organism_ids[i2]]

        # a list to store all data in
        data = []

        for alg in algs:
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data, index=algs, columns=columns)
        dfs[name] = df


In [None]:
net_names = ['yeast-fly', 'yeast-human', 'fly-human']

alg_names = ['greedy', 'NETAL', 'pinalog', 'CGRAAL', 'GRAAL', 'MIGRAAL', 'HubAlign', 'MAGNA', 'PROPER',
        'SPINAL-I', 'SPINAL-II', 'TopoAlign(CNS)', 'TopoAlign(JS)', 'TopoAlign(AAS)']

markers = ['.']*11 + ['*']*3

# colors = sns.color_palette("hls", 14)
colors = sns.hls_palette(14, l=.4, s=.8)
# colors = sns.color_palette("husl", 14)
# colors = sns.color_palette("Dark2", 14)

fig = plt.figure(figsize=(10,12))
plt.subplots_adjust(wspace=0.3, hspace=0.4)

for i in range(3):
    net_name = net_names[i]
    df = dfs[net_name]
    x = df['GOC']
    ax = fig.add_subplot(3, 2, 2*i+1)
    y = df['EC']
    
    for j in range(len(algs)):
        ax.scatter(x[j], y[j], label=alg_names[j], marker=markers[j], c=colors[j])
    
    ax.set_xlabel('GOC')
    ax.set_ylabel('EC')
    ax.set_title(net_name)
    ax = fig.add_subplot(3, 2, 2*i+2)
    y = df['S^3']

    for j in range(len(algs)):
        ax.scatter(x[j], y[j], label=alg_names[j], marker=markers[j], c=colors[j])

    ax.set_xlabel('GOC')
    ax.set_ylabel('$S^3$')
    ax.set_title(net_name)

plt.subplot(3, 2, 4)
lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

plt.savefig("test.pdf", bbox_extra_artists=(lgd,), bbox_inches='tight')
# %config InlineBackend.figure_format = 'svg'


In [None]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

# list of constants to compare:
bucket_size = [600, 800, 1000, 1200, 1400]
keep_ratio = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']

# organism selestion:
orgs = ['4932', '7227']

alldfs1 = []
for alg in our_algs:
    dfs1 = []
    for bs in bucket_size:
        cs.MAX_SEED_SIZE = bs
        data = []
        for kr in keep_ratio:
            cs.SEED_KEEP_RATIO = kr
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data,
                          index=['keep ratio: {}'.format(x) for x in keep_ratio],
                          columns=columns)

        dfs1.append(df)

    cdfs1 = pd.concat(dfs1, axis=0, keys=['bucket size: {}'.format(x) for x in bucket_size])
    alldfs1.append(cdfs1)

calldfs1 = pd.concat(alldfs1, axis=0, keys=our_algs)



In [None]:
calldfs1['EC']['seedexsingle']['bucket size: 600']

In [None]:
net_name = 'yeast-fly'

alg_names = ['TopoAlign(CNS)', 'TopoAlign(JS)', 'TopoAlign(AAS)']

# markers = ['.']*11 + ['*']*3

# colors = sns.color_palette("hls", 14)
# colors = sns.hls_palette(14, l=.4, s=.8)
# colors = sns.color_palette("husl", 14)
# colors = sns.color_palette("Dark2", 14)

fig = plt.figure(figsize=(6,12))
plt.subplots_adjust(wspace=0.3, hspace=0.4)

for i in range(3):
#     net_name = net_names[i]
    ax = fig.add_subplot(3, 1, i+1)
    x = keep_ratio
    alg = our_algs[i]
    alg_name = alg_names[i]
    for bs in bucket_size:
        col_name = 'bucket size: {}'.format(bs)
        y = calldfs1['EC'][alg][col_name]
        ax.plot(x, y, 'o-', label=col_name)
    
    ax.set_xlabel('keep ratio')
    ax.set_ylabel('EC')
    ax.set_title(alg_name)

plt.subplot(3, 1, 2)
lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

plt.savefig("test1.pdf", bbox_extra_artists=(lgd,), bbox_inches='tight')
# %config InlineBackend.figure_format = 'svg'


In [None]:
%%capture

# list of our algorithms
our_algs = ['seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

# list of constants to compare:
bucket_size = [800, 1000, 1200, 1400]
seed_size = [100, 200, 300, 400, 500, 600, 700, 800]

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3', 'GOC', 'PWS1', 'NBS']

# organism selestion:
orgs = ['4932', '7227']

alldfs2 = []
for alg in our_algs:
    dfs2 = []
    for bs in bucket_size:
        cs.MAX_SEED_SIZE = bs
        data = []
        for ss in seed_size:
            cs.SEED_KEEP_RATIO = ss / bs
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data,
                          index=['seed size: {}'.format(x) for x in seed_size],
                          columns=columns)

        dfs2.append(df)

    cdfs2 = pd.concat(dfs2, axis=0, keys=['bucket size: {}'.format(x) for x in bucket_size])
    alldfs2.append(cdfs2)

calldfs2 = pd.concat(alldfs2, axis=0, keys=our_algs)



In [None]:
net_name = 'yeast-fly'

alg_names = ['TopoAlign(CNS)', 'TopoAlign(JS)', 'TopoAlign(AAS)']

# markers = ['.']*11 + ['*']*3

# colors = sns.color_palette("hls", 14)
# colors = sns.hls_palette(14, l=.4, s=.8)
# colors = sns.color_palette("husl", 14)
# colors = sns.color_palette("Dark2", 14)

fig = plt.figure(figsize=(6,12))
plt.subplots_adjust(wspace=0.3, hspace=0.4)

for i in range(3):
#     net_name = net_names[i]
    ax = fig.add_subplot(3, 1, i+1)
    x = seed_size
    alg = our_algs[i]
    alg_name = alg_names[i]
    for bs in bucket_size:
        col_name = 'bucket size: {}'.format(bs)
        y = calldfs2['EC'][alg][col_name]
        ax.plot(x, y, 'o-', label=col_name)
    
    ax.set_xlabel('seed size')
    ax.set_ylabel('EC')
    ax.set_title(alg_name)

plt.subplot(3, 1, 2)
lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

plt.savefig("test2.pdf", bbox_extra_artists=(lgd,), bbox_inches='tight')
# %config InlineBackend.figure_format = 'svg'


In [None]:
orgs = ['4932', '7227']

cs.MAX_SEED_SIZE = 1400
cs.SEED_KEEP_RATIO = 0.5
cs.MAX_EXTEND_SIZE = 4000
cs.EXTEND_KEEP_RATIO = 0.0005

alg = 'seedex-greedy'
aln = alignment_result(orgs, alg)
aln


In [None]:
%%capture
# Now that we have all required initial setups let's gather some information:

# our main algorithms constants:
cs.SEED_KEEP_RATIO = 0.3
cs.MAX_SEED_SIZE = 1400

# algorithm selection
algs = ['greedy', 'NETAL', 'pinalog', 'CGRAAL', 'GRAAL', 'MIGRAAL', 'HubAlign', 'MAGNA', 'PROPER',
        'SPINAL-I', 'SPINAL-II', 'seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

dfs = {}

# ignore PWS2
columns = ['CE', 'EC', 'LCCS', 'ICS', 'S^3']

for i1 in range(len(organism_ids)):
    for i2 in range(i1 + 1, len(organism_ids)):
        name = '{}-{}'.format(organism_names[i1], organism_names[i2])

        # organism selestion:
        orgs = [organism_ids[i1], organism_ids[i2]]

        # a list to store all data in
        data = []

        for alg in algs:
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data, index=algs, columns=columns)
        
        perc_culs = ['EC', 'ICS', 'S^3']
        for x in perc_culs:
            df[x] = df[x].apply(as_percent)

        
        dfs[name] = df


In [None]:
# Or view all scores combined:
net_names = ['yeast-fly', 'yeast-human', 'fly-human']
dfpr = pd.concat([dfs[x] for x in net_names], axis=0, keys=net_names)
print(dfpr.to_latex())
# pd.concat([dfs[x] for x in net_names], axis=0, keys=net_names)
dfpr

In [None]:
%%capture
# Now that we have all required initial setups let's gather some information:

# our main algorithms constants:
cs.SEED_KEEP_RATIO = 0.3
cs.MAX_SEED_SIZE = 1400

# algorithm selection
algs = ['greedy', 'NETAL', 'pinalog', 'CGRAAL', 'GRAAL', 'MIGRAAL', 'HubAlign', 'MAGNA', 'PROPER',
        'SPINAL-I', 'SPINAL-II', 'seedexsingle', 'seedexsingle-jac', 'seedexsingle-ada']

dfs = {}

# ignore PWS2
columns = ['GOC']

for i1 in range(len(organism_ids)):
    for i2 in range(i1 + 1, len(organism_ids)):
        name = '{}-{}'.format(organism_names[i1], organism_names[i2])

        # organism selestion:
        orgs = [organism_ids[i1], organism_ids[i2]]

        # a list to store all data in
        data = []

        for alg in algs:
            aln = alignment_result(orgs, alg)
            data.append(aln)

        df = pd.DataFrame(data=data, index=algs, columns=columns)
        
        perc_culs = ['GOC']
        for x in perc_culs:
            df[x] = df[x].apply(as_percent)

        
        dfs[name] = df


In [None]:
# Or view all scores combined:
net_names = ['yeast-fly', 'yeast-human', 'fly-human']
dfpr = pd.concat([dfs[x] for x in net_names], axis=1, keys=net_names)
print(dfpr.to_latex())
# pd.concat([dfs[x] for x in net_names], axis=0, keys=net_names)
dfpr

In [None]:
cs.ALPHA_BIAS