# Epsilon

In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', optimized=True, numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
import h5py
import collections as col
np.set_printoptions(suppress=True, precision=3)

In [2]:
mg = analyze.MultiGeneration('epsilon')

In [3]:
run_id = 'epsilon'
generations_of_random_mating = 10
number_of_qtl = 10
number_of_replicates = 6
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
recombination_rates = [0.01]*1478

In [4]:
prefounders = sim.loadPopulation('bia_prefounders.pop')

In [5]:
sim.tagID(prefounders, reset=True)

In [6]:
alleles = np.array(pd.read_hdf('parameters/alleles_at_1478_loci.hdf'))

### Allele Effects & Frequencies Table

In [9]:
alpha, beta = alleles[:, 0], alleles[:, 1]

In [12]:
sim.stat(prefounders, alleleFreq=sim.ALL_AVAIL)

In [38]:
prefounder_allele_frequencies = prefounders.dvars().alleleFreq

In [39]:
prefounder_allele_frequencies

{0: defdict({1: 0.9807692307692307, 2: 0.019230769230769232}),
 1: defdict({1: 0.8461538461538461, 3: 0.15384615384615385}),
 2: defdict({1: 0.07692307692307693, 3: 0.9230769230769231}),
 3: defdict({0: 0.9230769230769231, 2: 0.07692307692307693}),
 4: defdict({0: 0.019230769230769232, 2: 0.9807692307692307}),
 5: defdict({0: 0.9230769230769231, 2: 0.07692307692307693}),
 6: defdict({0: 0.75, 2: 0.25}),
 7: defdict({1: 0.038461538461538464, 3: 0.9615384615384616}),
 8: defdict({0: 0.8461538461538461, 2: 0.15384615384615385}),
 9: defdict({1: 0.9615384615384616, 3: 0.038461538461538464}),
 10: defdict({1: 0.7307692307692307, 3: 0.2692307692307692}),
 11: defdict({1: 0.9230769230769231, 3: 0.07692307692307693}),
 12: defdict({1: 0.21153846153846154, 3: 0.7884615384615384}),
 13: defdict({0: 0.038461538461538464, 2: 0.9615384615384616}),
 14: defdict({0: 0.038461538461538464, 3: 0.9615384615384616}),
 15: defdict({1: 0.46153846153846156, 3: 0.5384615384615384}),
 16: defdict({2: 0.9230769

In [14]:
alpha_frq = np.asarray([prefounders.dvars().alleleFreq[locus][allele] for locus, allele in enumerate(alpha)])

In [43]:
len(prefounder_allele_frequencies)

1478

In [15]:
beta_frq = np.asarray([prefounders.dvars().alleleFreq[locus][allele] for locus, allele in enumerate(beta)])

In [33]:
alpha_effects, beta_effects = ae_array[, alpha], ae_array[range(1478), beta]

In [66]:
print(alleles)

[[1 2]
 [1 3]
 [3 1]
 ..., 
 [1 0]
 [3 0]
 [3 1]]


In [70]:
print(ae_array[qtl])

[[ 1.892  0.179  0.     0.     0.     0.   ]
 [ 0.92   1.     0.     0.     0.     0.   ]
 [ 0.079  0.     0.     1.653  0.     0.   ]
 [ 0.118  1.263  0.     0.     0.     0.   ]
 [ 3.731  0.     2.626  0.     0.     0.   ]
 [ 0.     0.673  0.     0.417  0.     0.   ]
 [ 0.418  0.     0.     1.94   0.     0.   ]
 [ 0.     0.6    0.     0.175  0.     0.   ]
 [ 0.     1.156  0.     0.257  0.     0.   ]
 [ 0.     0.91   0.     0.297  0.     0.   ]
 [ 0.     0.     0.087  0.246  0.     0.   ]
 [ 0.136  0.     0.     2.275  0.     0.   ]
 [ 0.     0.     0.     0.     1.024  0.842]
 [ 0.     1.421  0.182  0.     0.     0.   ]
 [ 0.238  0.426  0.     0.     0.     0.   ]
 [ 0.     0.015  0.     0.164  0.     0.   ]
 [ 0.     0.     0.     0.     2.413  0.69 ]
 [ 0.454  0.     0.     2.111  0.     0.   ]
 [ 0.     0.     0.177  0.958  0.     0.   ]
 [ 0.     0.75   0.     0.224  0.     0.   ]
 [ 0.502  0.     0.078  0.     0.     0.   ]
 [ 0.     0.168  0.     0.63   0.     0.   ]
 [ 0.     

In [72]:
aetable

Unnamed: 0,alpha,alpha_effect,alpha_frequency,beta,beta_effect,beta_frequency
0,1,0.000000,0.980769,2,0.000000,0.019231
1,1,0.000000,0.846154,3,0.000000,0.153846
2,3,0.000000,0.923077,1,0.000000,0.076923
3,0,0.000000,0.923077,2,0.000000,0.076923
4,2,0.000000,0.980769,0,0.000000,0.019231
5,0,0.000000,0.923077,2,0.000000,0.076923
6,0,0.000000,0.750000,2,0.000000,0.250000
7,3,0.000000,0.961538,1,0.000000,0.038462
8,0,0.000000,0.846154,2,0.000000,0.153846
9,1,0.000000,0.961538,3,0.000000,0.038462


In [64]:
prefounder_allele_frequencies

{0: defdict({1: 0.9807692307692307, 2: 0.019230769230769232}),
 1: defdict({1: 0.8461538461538461, 3: 0.15384615384615385}),
 2: defdict({1: 0.07692307692307693, 3: 0.9230769230769231}),
 3: defdict({0: 0.9230769230769231, 2: 0.07692307692307693}),
 4: defdict({0: 0.019230769230769232, 2: 0.9807692307692307}),
 5: defdict({0: 0.9230769230769231, 2: 0.07692307692307693}),
 6: defdict({0: 0.75, 2: 0.25}),
 7: defdict({1: 0.038461538461538464, 3: 0.9615384615384616}),
 8: defdict({0: 0.8461538461538461, 2: 0.15384615384615385}),
 9: defdict({1: 0.9615384615384616, 3: 0.038461538461538464}),
 10: defdict({1: 0.7307692307692307, 3: 0.2692307692307692}),
 11: defdict({1: 0.9230769230769231, 3: 0.07692307692307693}),
 12: defdict({1: 0.21153846153846154, 3: 0.7884615384615384}),
 13: defdict({0: 0.038461538461538464, 2: 0.9615384615384616}),
 14: defdict({0: 0.038461538461538464, 3: 0.9615384615384616}),
 15: defdict({1: 0.46153846153846156, 3: 0.5384615384615384}),
 16: defdict({2: 0.9230769

In [63]:
type(prefounder_allele_frequencies)

dict

In [36]:
aef_table = np.asarray([alpha, alpha_effects, alpha_frq, beta, beta_effects, beta_frq])

In [60]:
def generate_allele_effects_table(population_allele_frequencies, allele_array, allele_effect_array):
    """
    Creates a pandas DataFrame with the columns:
    + alpha allele
    + alpha allele effect
    + alpha allele frequency
    + beta allele
    + beta allele effect
    + beta allele frequency
    
    :warning:`Assumes di-allelic case`
    """
    column_labels = ['alpha', 'alpha_effect', 'alpha_frequency', 
                     'beta', 'beta_effect', 'beta_frequency']
    number_of_loci = len(population_allele_frequencies)
    alpha_alleles, beta_alleles = allele_array[:, 0], allele_array[:, 1]
    alpha_effects, beta_effects = allele_effect_array[range(number_of_loci), alpha_alleles], \
    allele_effect_array[range(number_of_loci), beta_alleles]
    alpha_frequencies = np.asarray([population_allele_frequencies[locus][allele] 
                                    for locus, allele in enumerate(alpha_alleles)])
    beta_frequencies = np.asarray([population_allele_frequencies[locus][allele] 
                                   for locus, allele in enumerate(beta_alleles)])
    allele_effects_table = pd.DataFrame(np.asarray([alpha_alleles, alpha_effects, alpha_frequencies,
                                        beta_alleles, beta_effects, beta_frequencies]).T,
                                        columns=column_labels)
    return allele_effects_table

In [61]:
aetable = generate_allele_effects_table(prefounder_allele_frequencies, alleles, ae_array)

In [74]:
aetable.ix[qtl, :]

Unnamed: 0,alpha,alpha_effect,alpha_frequency,beta,beta_effect,beta_frequency
27,0,1.891549,0.980769,1,0.17944,0.019231
31,0,0.920381,0.769231,1,1.000491,0.230769
64,0,0.079128,0.961538,3,1.65252,0.038462
92,0,0.11835,0.980769,1,1.263197,0.019231
121,2,2.626293,0.903846,0,3.730527,0.096154
134,3,0.416856,0.711538,1,0.673493,0.288462
150,0,0.418157,0.942308,3,1.940251,0.057692
153,1,0.599692,0.730769,3,0.175227,0.269231
162,1,1.156038,0.923077,3,0.257436,0.076923
292,1,0.909707,0.730769,3,0.296548,0.269231


In [99]:
def minor_allele_frequencies_table(population_allele_frequencies, minor_alleles):
    number_of_loci = len(population_allele_frequencies)
    column_labels = ['minor_allele', 'minor_frequency']
    minor_allele_frequencies = np.array([population_allele_frequencies[locus][allele]
                                        for locus, allele in enumerate(minor_alleles)])
    return pd.DataFrame(np.array([minor_alleles, minor_allele_frequencies]).T, columns=column_labels)

In [101]:
mafrqs = minor_allele_frequencies_table(prefounder_allele_frequencies, minor_alleles)

In [102]:
print(mafrqs)

      minor_allele  minor_frequency
0                2         0.019231
1                3         0.153846
2                1         0.076923
3                2         0.076923
4                0         0.019231
5                2         0.076923
6                2         0.250000
7                1         0.038462
8                2         0.153846
9                3         0.038462
10               3         0.269231
11               3         0.076923
12               1         0.211538
13               0         0.038462
14               0         0.038462
15               1         0.461538
16               3         0.076923
17               3         0.038462
18               0         0.038462
19               4         0.038462
20               0         0.461538
21               1         0.230769
22               3         0.019231
23               5         0.519231
24               3         0.038462
25               0         0.307692
26               0         0

In [84]:
aetable.join(mafrqs)

Unnamed: 0,alpha,alpha_effect,alpha_frequency,beta,beta_effect,beta_frequency,minor_allele,mu_frequency
0,1,0.000000,0.980769,2,0.000000,0.019231,2,0.019231
1,1,0.000000,0.846154,3,0.000000,0.153846,3,0.153846
2,3,0.000000,0.923077,1,0.000000,0.076923,1,0.076923
3,0,0.000000,0.923077,2,0.000000,0.076923,2,0.076923
4,2,0.000000,0.980769,0,0.000000,0.019231,0,0.019231
5,0,0.000000,0.923077,2,0.000000,0.076923,2,0.076923
6,0,0.000000,0.750000,2,0.000000,0.250000,2,0.250000
7,3,0.000000,0.961538,1,0.000000,0.038462,1,0.038462
8,0,0.000000,0.846154,2,0.000000,0.153846,2,0.153846
9,1,0.000000,0.961538,3,0.000000,0.038462,3,0.038462


In [53]:
aetable

In [19]:
rdm_populations = sim.Simulator(prefounders, 2, stealPops=False)
sel_populations = sim.Simulator(prefounders, 2, stealPops=False)
dri_populations = sim.Simulator(prefounders, 2, stealPops=False)

rdm_magic = breed.MAGIC(rdm_populations, founders, recombination_rates)
sel_magic = breed.MAGIC(sel_populations, founders, recombination_rates)
dri_magic = breed.MAGIC(dri_populations, founders, recombination_rates)

sim.tagID(prefounders, reset=27)

rdm_magic.generate_f_one(founders, os_per_pair)
sel_magic.generate_f_one(founders, os_per_pair)
dri_magic.generate_f_one(founders, os_per_pair)

sim.stat(rdm_populations.population(0), alleleFreq=sim.ALL_AVAIL)
af = analyze.allele_data(rdm_populations.population(0), alleles, list(range(1478)))

In [20]:
minor_alleles = np.asarray(af.minor_allele, dtype=np.int8)

In [21]:
rdm_magic.recombinatorial_convergence(rdm_populations, 4, 500)
sel_magic.recombinatorial_convergence(sel_populations, 4, 500)
dri_magic.recombinatorial_convergence(dri_populations, 4, 500)

Start of recombinatorial convergence.
Prior to convergence: 4
Prior to convergence: 2
Start of recombinatorial convergence.
Prior to convergence: 4
Prior to convergence: 2
Start of recombinatorial convergence.
Prior to convergence: 4
Prior to convergence: 2


In [22]:
study = analyze.Study(run_id)

In [23]:
qtl = sorted(random.sample(tuple(range(1478)), 50))

In [24]:
additive_trait = parameters.Trait()
allele_effects = additive_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1,
                                                     multiplicity=1)

In [25]:
ae_array = additive_trait.convert_allele_effects_into_array(prefounders.totNumLoci(), 6, allele_effects)

In [None]:
ae_array[qtl, :]

In [None]:
sampling_generations = [i for i in range(2, 10, 2)]
sample_sizes = {i: 100 for i in range(11)}

In [None]:
rdm_meta_populations = {rep: [] for rep in range(2)}
sel_meta_populations = {rep: [] for rep in range(2)}
dri_meta_populations = {rep: [] for rep in range(2)}

In [None]:
import importlib as imp
imp.reload(simulate)

In [None]:
rdm_mating = simulate.RandomMating(10, 2000, 0.05, 0.5, 5, 0.7, sample_sizes)

In [None]:
drift = simulate.Drift(10, 2000, 0.05, 0.5, 5, 0.7, sample_sizes)

In [None]:
selection = simulate.Truncation(10, 2000, 0.05, 0.5, 5, 0.7, sample_sizes)

In [None]:
rdm_mating.replicate_random_mating(rdm_populations, rdm_meta_populations, qtl, ae_array, recombination_rates)

del rdm_populations

In [None]:
drift.replicate_recurrent_drift(dri_populations, dri_meta_populations, qtl, ae_array, recombination_rates)

In [None]:
selection.replicate_selection(sel_populations, sel_meta_populations, qtl, ae_array, recombination_rates)

del sel_populations

In [None]:
wombo_combo = [rdm_meta_populations, dri_meta_populations, sel_meta_populations]

In [None]:
def combine_samples(sample_library):
    

In [None]:
for rep, sample_list in rdm_meta_populations.items():
    for sample in sample_list:
        sim.stat(sample, alleleFreq=sim.ALL_AVAIL)

In [None]:
sets_of_segregating_loci = study.seg_loci_among_samples(rdm_meta_populations)

In [None]:
len(sets_of_segregating_loci.keys())

In [None]:
concordant_segregating_loci = list(sets_of_segregating_loci.keys())[0]

In [None]:
concordant_segregating_loci

In [None]:
repz = rdm_meta_populations[0]

In [None]:
for sample in repz[1:]:
    repz[0].addIndFrom(sample)

In [None]:
meta_pop = repz[0]

In [None]:
minor_alleles_at_loci = np.asarray(af.minor_allele)[list(concordant_segregating_loci)]

In [None]:
concordant_segregating_loci

In [None]:
add_trait = parameters.Trait()

In [None]:
len(concordant_segregating_loci)

In [None]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage[run_id] = concordant_segregating_loci
seg_loc_storage.close()

In [None]:
droppable_loci = [droppable for droppable in range(1478) if droppable not in concordant_segregating_loci]

In [None]:
rdm_meta_populations

In [None]:
repone = rdm_meta_populations[1]

In [None]:
repone[0].dvars().epsilon

In [None]:
repz = rdm_meta_populations[0]

In [None]:
repz_meta_pop = repz[0].clone()

In [None]:
repz[1].dvars().epsilon

In [None]:
id(repz_meta_pop)

In [None]:
id(repz[0])

In [None]:
sim.stat(meta_pop, alleleFreq=sim.ALL_AVAIL)

In [None]:
minor_alleles

In [None]:
meta_pop.dvars().alleleFreq

In [None]:
meta_pop.indInfo('p')

In [None]:
indir = "/home/vakanas/tassel-5-standalone/input/"
outdir = "/home/vakanas/tassel-5-standalone/output/"
rep_id_name = "0"

In [None]:
minor_alleles

In [None]:
segminor_alleles = minor_alleles[list(concordant_segregating_loci)]

In [None]:
minor_allele_frequencies = np.asarray(list(meta_pop.dvars().alleleFreq[locus][allele]
     for locus, allele in enumerate(minor_alleles)))

In [None]:
run_id

In [None]:
gwas = analyze.GWAS(meta_pop, list(range(meta_pop.totNumLoci())), run_id)

In [None]:
ccm = gwas.calculate_count_matrix(minor_alleles, list(range(meta_pop.totNumLoci())))

In [None]:
ps_svd = gwas.pop_struct_svd(ccm)

In [None]:
name = run_id+'_'+rep_id_name

In [None]:
gwas.population_structure_formatter(ps_svd, indir+name+'_structure_matrix.txt')

In [None]:
int_to_snp_map = {0:'A', 1:'C', 2:'G', 3:'T', 4:'-', 5:'+'}

In [None]:
len(concordant_segregating_loci)

In [None]:
locus_names = list(concordant_segregating_loci)

In [None]:
alleles_column = ['NA']*len(concordant_segregating_loci)

In [None]:
chromosomes = [meta_pop.chromLocusPair(locus)[0]+1 for locus in concordant_segregating_loci]

In [None]:
gwas.hapmap_formatter(concordant_segregating_loci, alleles_column, 
                      locus_names, chromosomes, 
                      locus_names, 
                      indir+name+'_simulated_hapmap.txt')

In [None]:
minor_allele_frequencies.shape

In [None]:
gwas.calc_kinship_matrix(ccm, minor_allele_frequencies, indir+name+'_kinship_matrix.txt')

In [None]:
gwas.trait_formatter(indir+name+'_trait_vector.txt')

In [None]:
import xml.etree.ElementTree as ET
import lxml.etree as etree

In [None]:
config_file_template = '/home/vakanas/BISB/rjwlab-scripts/saegus_project/devel/magic/1478/gwas_pipeline.xml'

In [None]:
tree = ET.parse(config_file_template)
root = tree.getroot()
lxml_tree = etree.fromstring(ET.tostring(root))
lxml_root = lxml_tree.getroottree()

In [None]:
lxml_root.find('fork1/h').text = indir+name+'_simulated_hapmap.txt'
lxml_root.find('fork2/t').text = indir+name+'_trait_vector.txt'
lxml_root.find('fork3/q').text = indir+name+'_structure_matrix.txt'
lxml_root.find('fork4/k').text = indir+name+'_kinship_matrix.txt'

lxml_root.find('combine6/export').text = outdir+name+'_out_'

In [None]:
rep_id_name

In [None]:
run_id

In [None]:
lxml_root.write("/home/vakanas/tassel-5-standalone/"+"R"+rep_id_name+'_'+
                run_id+'_'+"_sim_gwas_pipeline.xml",
                encoding="UTF-8",
                method="xml", 
                xml_declaration=True, 
                standalone='',
                pretty_print=True)

In [None]:
mg.multiple_sample_analyzer(meta_populations, qtl, allele_effects, 
                            minor_alleles, concordant_segregating_loci)

In [None]:
analyze.store_allele_effect_frequency_tables(meta_population, alleles, 
                                             qtl,
                                             exponential_allele_effects,
                                            run_id, 'exponential')

In [None]:
loci_conversions = shelve.open(run_id+'_loci_conversions')
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(concordant_segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus
loci_conversions['saegus_to_tassel'] = saegus_to_tassel_loci
loci_conversions['tassel_to_saegus'] = tassel_to_saegus_loci
loci_conversions.close()

In [None]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage['bacchus'] = concordant_segregating_loci
seg_loc_storage.close()

In [None]:
int_to_snp = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}
snp_to_int = {'A': 0, 'C': 1, '-': 4, 'G': 2, '+': 5, 'T': 3}
conv = shelve.open('synthesis_parameters')
conv['integer_to_snp'] = int_to_snp
conv['snp_to_integer'] = snp_to_int
conv.close()

In [None]:
exponential_allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                exponential_allele_effects, saegus_to_tassel_loci)

In [None]:
analyze.remap_allele_frequency_table_loci(analyze.reload_allele_frequencies_table(run_id, 0, 250, 
                                                                                  'exponential'), 
                                          concordant_segregating_loci)

In [None]:
analyze.write_multiple_sample_analyzer(sample_library, sample_sizes, qtl, alleles, 
                                       exponential_allele_effects, 0.7,  concordant_segregating_loci, 
                                       run_id=run_id, sub_run_id='_exponential', 
                                       allele_frequency_hdf=run_id+'_allele_frequency_storage.h5')

In [None]:
multiple_sample_analyzer(meta_populations, qtl, allele_effects, minor_alleles, concordant_segregating_loci)

In [None]:
import h5py

In [None]:
with h5py.File('bia_allele_frequencies.hdf5') as biaf:
    reloaded_af = np.array(biaf[afname])

In [None]:
fisegloc = list(concordant_segregating_loci)

In [None]:
minor_allele_frequencies = reloaded_af[fisegloc]

In [None]:
def write_super_tables(power_and_fpr_raw_data, sample_sizes, number_of_replicates, run_id, sub_run_id=''):
    for size in sample_sizes:
        for rep in range(number_of_replicates):
            name = run_id + '_' + sub_run_id + '_' + str(rep) + '_' + str(size) + '_super_table.txt'
            power_and_fpr_raw_data[size][rep].to_csv(name, sep='\t')

In [None]:
expo_power_fpr_raw_data = analyze.collect_power_analysis_data(run_id, sample_sizes, number_of_replicates, concordant_segregating_loci, 'exponential')

In [None]:
expo_power_fpr_raw_data[250]

In [None]:
write_super_tables(expo_power_fpr_raw_data,
                  sample_sizes,
                  number_of_replicates,
                  'bacchus',
                  sub_run_id='exponential')

In [None]:
expo_results, expo_true_positives, expo_false_positives = study.calculate_power_fpr(expo_power_fpr_raw_data, sample_sizes, 
                                                                             number_of_replicates, number_of_qtl)

In [None]:
expo_results

In [None]:
mean_and_stdev = pd.DataFrame([expo_results.mean(), expo_results.std()], index=['mean', 'stdev']).T
mean_and_stdev.to_csv('bacchus_exponential_mean_and_stdev_power_fpr.csv', sep='\t')

In [None]:
geo_results

In [None]:
geometric_allele_effects_table

In [None]:
exponential_allele_effects_table

In [None]:
expo_results.to_csv("bacchus_exponential_power_fpr_results.txt", sep='\t')

In [None]:
mean_and_stdev = pd.DataFrame([geo_results.mean(), geo_results.std()], index=['mean', 'stdev']).T
mean_and_stdev.to_csv('full_icecrown_geometric_mean_and_stdev_power_fpr.txt', sep='\t')

In [None]:
expo_results, expo_true_positives, expo_false_positives = full_icecrown.calculate_power_fpr(expo_power_fpr_raw_data,
                                                                                      sample_sizes,
                                                                                      number_of_replicates,
                                                                                      number_of_qtl)

In [None]:
expo_results

In [None]:
expo_results.to_csv('full_icecrown_exponential_power_fpr_results.txt', sep='\t')

In [None]:
mean_and_stdev = pd.DataFrame([expo_results.mean(), expo_results.std()], index=['mean', 'stdev']).T
mean_and_stdev.to_csv('full_icecrown_exponential_mean_and_stdev_power_fpr.txt', sep='\t')

In [None]:
write_super_tables(expo_power_fpr_raw_data, sample_sizes, number_of_replicates, run_id, 'exponential')

In [None]:
geo_aggregate_estimated_actual = pd.DataFrame([np.array(geo_agg_estimated), np.array(geo_agg_actual)], index=['estimated', 'actual']).T

In [None]:
geo_aggregate_estimated_actual['estimated'] = geo_aggregate_estimated_actual['estimated'].apply(np.fabs)

In [None]:
geo_aggregate_estimated_actual

In [None]:
geo_corr = geo_aggregate_estimated_actual['estimated'].corr(geo_aggregate_estimated_actual['actual'])

In [None]:
geo_agg_estimated

In [None]:
aggregate_estimated_actual

In [None]:
geo_corr

In [None]:
pwd

In [None]:
geo_aggregate_estimated_actual.to_csv('full_icecrown_geometric_estimated_vs_actual_allele_effects.txt', sep='\t')

In [None]:
agg_estimated = []
agg_actual = []

In [None]:
for rep in reps:
    for size in sample_sizes:
        sutable = sutable_collection[rep][size]
        droppable = list(sutable.ix[sutable.ix[:, 'difference'] == 0.0].index)
        qtloci = sutable.drop(droppable, axis=0)
        agg_estimated.extend(list(qtloci['add_effect']))
        agg_actual.extend(list(qtloci['difference']))

In [None]:
aggregate_estimated_actual = pd.DataFrame([np.array(agg_estimated), np.array(agg_actual)], index=['estimated', 'actual']).T

In [None]:
aggregate_estimated_actual['estimated'] = np.fabs(aggregate_estimated_actual['estimated'])

In [None]:
aggregate_estimated_actual

In [None]:
correlation_actual_vs_effects = aggregate_estimated_actual['estimated'].corr(aggregate_estimated_actual['actual'])

In [None]:
aggregate_estimated_actual.to_csv('full_icecrown_exponential_estimated_vs_actual_allele_effects.txt', sep='\t')

In [None]:
aggregate_estimated_actual['estimated'] = np.fabs(aggregate_estimated_actual['estimated'])

In [None]:
cd C:\tassel\output\full_icecrown\exponential

In [None]:
expo_estimated_actual = pd.read_csv('full_icecrown_exponential_estimated_vs_actual_allele_effects.txt', sep='\t', index_col=0)

In [None]:
expo_estimated_actual

In [None]:
aggregate_estimated_actual

In [None]:
, from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook

In [None]:
output_notebook()

In [None]:
aggregate_estimated_actual

In [None]:
geo_x = aggregate_estimated_actual['estimated']
geo_y = aggregate_estimated_actual['actual']

In [None]:
p = figure(title="Estimated vs Actual Allele Effects - Geometric Series", 
           title_text_font_size="16",
          x_range=(-0.2, 4))

In [None]:
p.scatter(geo_x, y, x="Estimated", y="Actual")

p.xaxis.axis_label = "Estimated"
p.yaxis.axis_label = "Actual"

In [None]:
show(p)

In [None]:
expo

In [None]:
p = figure(title="Estimated vs Actual Allele Effects - Geometric Series", title_text_font_size="16")

In [None]:
expo_plot = figure(title="Estimated vs Actual Effects - Exponential(lambda=1)", 
                   title_text_font_size="16", 
                  x_range=(0, 4))

x = np.array(expo_estimated_actual['estimated'])
y = np.array(expo_estimated_actual['actual'])

expo_plot.xaxis.axis_label = "Estimated"
expo_plot.yaxis.axis_label = "Actual"

In [None]:
expo_plot.scatter(x, y)

In [None]:
show(expo_plot)

In [None]:
from bokeh.io import hplot

In [None]:
geo_plot = figure(title="Estimated vs Actual Allele Effects - Geometric Series", 
           title_text_font_size="16",
          x_range=(0, 4), y_range=(0, 4))

In [None]:
geo_x = aggregate_estimated_actual['actual']
geo_y = aggregate_estimated_actual['estimated']

In [None]:
geo_plot.xaxis.axis_label = "Actual"
geo_plot.yaxis.axis_label = "Estimated"
geo_plot.scatter(geo_x, geo_y, x="Actual", y="Estimated")

In [None]:
expo_plot = figure(title="Estimated vs Actual Effects - Exponential(lambda=1)", 
                   title_text_font_size="16", 
                  x_range=(0, 4), y_range=(0, 4))

expo_x = np.array(expo_estimated_actual['actual'])
expo_y = np.array(expo_estimated_actual['estimated'])

expo_plot.xaxis.axis_label = "Actual"
expo_plot.yaxis.axis_label = "Estimated"
expo_plot.scatter(expo_x, expo_y)

In [None]:
multi_plot = hplot(geo_plot, expo_plot)
show(multi_plot)

In [None]:
output_file("multi_plot.png")

In [None]:
ls