In [2]:
import simuOpt
simuOpt.setOptions(alleleType='short', optimized=True, numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
import collections as col
np.set_printoptions(suppress=True, precision=3)

In [3]:
run_id='icqg'
sample_sizes=[250, 500, 750, 1000]
number_of_qtl = 10
number_of_replicates = 50
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
recombination_rates = [0.01]*1478

In [4]:
founder_pair_count = len(founders)

In [5]:
if founder_pair_count == 1:
    pass
elif founder_pair_count % 2 == 0:
    founder_pair_count /= 2
elif founder_pair_count % 2 > 0:
    founder_pair_count + 1
    founders.append(founders[0])
else:
    pass

In [6]:
len(founders) / 2

2.0

In [7]:
founders

[[2, 26], [3, 25], [4, 24], [5, 23]]

In [8]:
founder_generator = (founder_pair for founder_pair in founders)

In [9]:
while founder_generator:
    print(next(founder_generator))

[2, 26]
[3, 25]
[4, 24]
[5, 23]


StopIteration: 

In [10]:
for pair in founder_generator:
    print(pair)

In [11]:
prefounders = sim.loadPopulation('prefounders1478.pop')
multi_prefounders = sim.Simulator(prefounders, number_of_replicates, stealPops=False)
magic = breed.MAGIC(multi_prefounders, founders, recombination_rates)
sim.tagID(prefounders, reset=27)
magic.generate_f_one(founders, os_per_pair)
mrc = breed.MultiRandomCross(multi_prefounders, 4, os_per_pair)
mother_choices, father_choices = mrc.determine_random_cross()
multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(mother_choices, father_choices)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
final_mrc = breed.MultiRandomCross(multi_prefounders, 2, 1000)
final_mothers, final_fathers = final_mrc.determine_random_cross()
final_multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(final_mothers, final_fathers)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(final_multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
mater = breed.MAGIC(multi_prefounders, founders, [0.01]*1478)
mater.random_mating(3, 2000)

Initiating random mating for 3 generations.


In [12]:
icqg = analyze.Study('icqg')

In [13]:
sample_library = icqg.collect_samples(multi_prefounders, sample_sizes)

In [14]:
sample_library

{0: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 1: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 2: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 3: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 4: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 5: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 6: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 7: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 8: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 9: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],


In [16]:
alleles = np.array(pd.read_hdf('parameters/alleles_at_1478_loci.hdf'))

In [38]:
sample_library[0][0]

<simuPOP.Population>

In [40]:
sample_library[0]

[<simuPOP.Population>,
 <simuPOP.Population>,
 <simuPOP.Population>,
 <simuPOP.Population>]

In [41]:
for i in range(50):
    for j in range(4):
        sim.stat(sample_library[i][j], alleleFreq=sim.ALL_AVAIL)

In [18]:
analyze.multi_sample_allele_frq_storage(sample_library, alleles, run_id)

AttributeError: module 'saegus.analyze' has no attribute 'multi_sample_allele_frq_storage'

In [19]:
sets_of_segregating_loci = icqg.seg_loci_among_samples(sample_library)

In [20]:
sets_of_segregating_loci.values()

dict_values([200])

In [21]:
concordant_segregating_loci = list(sets_of_segregating_loci.keys())[0]

In [22]:
concordant_segregating_loci

(1,
 2,
 3,
 4,
 5,
 6,
 8,
 10,
 12,
 13,
 14,
 15,
 16,
 20,
 21,
 23,
 24,
 25,
 26,
 27,
 29,
 30,
 31,
 32,
 34,
 36,
 37,
 38,
 39,
 40,
 42,
 45,
 46,
 50,
 52,
 53,
 54,
 56,
 59,
 61,
 62,
 63,
 66,
 67,
 68,
 70,
 71,
 74,
 76,
 77,
 79,
 80,
 82,
 86,
 88,
 90,
 91,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 103,
 104,
 106,
 107,
 108,
 109,
 114,
 115,
 116,
 119,
 120,
 121,
 122,
 123,
 124,
 126,
 127,
 128,
 129,
 130,
 132,
 134,
 136,
 138,
 139,
 140,
 141,
 144,
 145,
 146,
 147,
 149,
 152,
 153,
 155,
 158,
 159,
 163,
 165,
 168,
 170,
 171,
 172,
 173,
 174,
 175,
 177,
 181,
 182,
 185,
 186,
 190,
 191,
 192,
 193,
 196,
 198,
 202,
 208,
 209,
 211,
 212,
 214,
 215,
 218,
 219,
 220,
 221,
 223,
 224,
 225,
 227,
 229,
 232,
 235,
 240,
 241,
 246,
 247,
 249,
 250,
 252,
 253,
 254,
 255,
 256,
 257,
 258,
 259,
 261,
 262,
 263,
 264,
 265,
 266,
 268,
 270,
 272,
 274,
 275,
 277,
 278,
 279,
 280,
 282,
 283,
 284,
 285,
 288,
 291,
 292,
 293,
 294,
 297,
 

In [23]:
qtl = tuple(sorted(random.sample(concordant_segregating_loci, number_of_qtl)))

In [24]:
qtl

(122, 264, 404, 432, 443, 884, 1084, 1182, 1221, 1250)

In [25]:
add_trait = parameters.Trait()

In [27]:
exponential_allele_effects = add_trait.assign_allele_effects(alleles, qtl, np.random.exponential, 1)

In [None]:
import importlib as imp
imp.reload(parameters)

In [31]:
geometric_allele_effects = add_trait.assign_geometric_series(exponential_allele_effects, 0.90, 10)

In [32]:
geometric_allele_effects

{122: {0: 0.9, 3: 0},
 264: {1: 0.81, 3: 0},
 404: {1: 0.7290000000000001, 2: 0},
 432: {0: 0.6561, 3: 0},
 443: {1: 0.5904900000000001, 2: 0},
 884: {1: 0.531441, 3: 0},
 1084: {0: 0.4782969000000001, 2: 0},
 1182: {4: 0.4304672100000001, 5: 0},
 1221: {1: 0.3874204890000001, 3: 0},
 1250: {0: 0.3486784401000001, 2: 0}}

In [28]:
exponential_allele_effects

{122: {0: 0.7321702409349414, 3: 1.2672678180272479},
 264: {1: 1.839474883008665, 3: 0.7931681853928985},
 404: {1: 1.0085487338516155, 2: 0.20254463375604548},
 432: {0: 0.046255253082226014, 3: 0.17958972954578897},
 443: {1: 0.3330197539035922, 2: 0.7784679181708912},
 884: {1: 1.8940515793860433, 3: 0.5077624217838272},
 1084: {0: 0.5147906970311666, 2: 1.263350717270373},
 1182: {4: 0.5554858829822452, 5: 0.17776171231609753},
 1221: {1: 0.8156399170033978, 3: 3.2704472775431372},
 1250: {0: 1.8358463512919043, 2: 2.8213428941346486}}

In [None]:
concordant_segregating_loci

In [None]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage[run_id] = concordant_segregating_loci
seg_loc_storage.close()

In [29]:
droppable_loci = [droppable for droppable in range(1478) if droppable not in concordant_segregating_loci]

In [30]:
droppable_loci

[0,
 7,
 9,
 11,
 17,
 18,
 19,
 22,
 28,
 33,
 35,
 41,
 43,
 44,
 47,
 48,
 49,
 51,
 55,
 57,
 58,
 60,
 64,
 65,
 69,
 72,
 73,
 75,
 78,
 81,
 83,
 84,
 85,
 87,
 89,
 92,
 93,
 94,
 102,
 105,
 110,
 111,
 112,
 113,
 117,
 118,
 125,
 131,
 133,
 135,
 137,
 142,
 143,
 148,
 150,
 151,
 154,
 156,
 157,
 160,
 161,
 162,
 164,
 166,
 167,
 169,
 176,
 178,
 179,
 180,
 183,
 184,
 187,
 188,
 189,
 194,
 195,
 197,
 199,
 200,
 201,
 203,
 204,
 205,
 206,
 207,
 210,
 213,
 216,
 217,
 222,
 226,
 228,
 230,
 231,
 233,
 234,
 236,
 237,
 238,
 239,
 242,
 243,
 244,
 245,
 248,
 251,
 260,
 267,
 269,
 271,
 273,
 276,
 281,
 286,
 287,
 289,
 290,
 295,
 296,
 298,
 306,
 309,
 320,
 322,
 323,
 324,
 325,
 326,
 330,
 332,
 337,
 340,
 343,
 347,
 351,
 357,
 358,
 360,
 364,
 365,
 368,
 369,
 370,
 371,
 372,
 373,
 374,
 376,
 377,
 385,
 386,
 391,
 396,
 397,
 403,
 405,
 406,
 407,
 409,
 414,
 415,
 416,
 417,
 419,
 422,
 424,
 425,
 427,
 439,
 444,
 446,
 455,
 45

In [None]:
imp.reload(analyze)

In [42]:
analyze.store_allele_effect_frequency_tables(sample_library, alleles, 
                                             qtl,
                                             geometric_allele_effects,
                                            run_id, 'geometric')

In [43]:
loci_conversions = shelve.open(run_id+'_loci_conversions')
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(concordant_segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus
loci_conversions['saegus_to_tassel'] = saegus_to_tassel_loci
loci_conversions['tassel_to_saegus'] = tassel_to_saegus_loci
loci_conversions.close()

In [44]:
exponential_allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                exponential_allele_effects, saegus_to_tassel_loci)

TypeError: generate_allele_effects_table() takes 3 positional arguments but 4 were given

In [None]:
exponential_allele_effects_table

In [None]:
geometric_allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                geometric_allele_effects, saegus_to_tassel_loci)

In [None]:
geometric_allele_effects_table

In [45]:
analyze.write_multiple_sample_analyzer(sample_library, sample_sizes, qtl, alleles, 
                                       geometric_allele_effects, 0.7,  concordant_segregating_loci, 
                                       run_id=run_id, sub_run_id='_geometric', 
                                       allele_frequency_hdf=run_id+'_storage.h5')

KeyError: 'No object named icqg/0/250 in the file'

In [None]:
int_to_snp = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}
snp_to_int = {'A': 0, 'C': 1, '-': 4, 'G': 2, '+': 5, 'T': 3}
conv = shelve.open('synthesis_parameters')
conv['integer_to_snp'] = int_to_snp
conv['snp_to_integer'] = snp_to_int
conv.close()

In [None]:
conv['integer_to_snp'] = int_to_snp
conv['snp_to_integer'] = snp_to_int


In [None]:
int_to_snp = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}
snp_to_int = {'A': 0, 'C': 1, '-': 4, 'G': 2, '+': 5, 'T': 3}

In [None]:
snp_to_int

In [3]:
cd C:\tassel\output

C:\tassel\output


In [5]:
seg_loc_storage = shelve.open('segregating_loci_storage')

In [46]:
list(seg_loc_storage)

NameError: name 'seg_loc_storage' is not defined

In [7]:
concordant_segregating_loci = seg_loc_storage['icqg']

In [23]:
import importlib as imp
imp.reload(analyze)

<module 'saegus.analyze' from 'C:\\Anaconda3\\lib\\site-packages\\saegus\\analyze.py'>

In [24]:
icqg = analyze.Study(run_id)

In [25]:
power_fpr_raw_data = icqg.collect_power_analysis_data(sample_sizes, number_of_replicates, concordant_segregating_loci, 'geometric')

In [31]:
power_fpr_raw_data[500][0].to_csv('icqg_geometric_0_500_super_table.txt', sep='\t')

In [35]:
for size in sample_sizes:
    for rep in range(50):
        name = 'icqg_geometric_' + str(rep) + '_' + str(size) + '_super_table.txt'
        power_fpr_raw_data[size][rep].to_csv(name, sep='\t')

In [47]:
mean_and_stdev = pd.DataFrame([geo_results.mean(), geo_results.std()], index=['mean', 'stdev']).T

In [49]:
mean_and_stdev.to_csv('icqg_mean_and_stdev_power_fpr.csv', sep='\t')

In [16]:
sutable

Unnamed: 0,Chr,df,F,p,add_effect,add_F,add_p,dom_effect,dom_F,dom_p,...,major_allele,major_frequency,locus,alpha_allele,alpha_frequency,alpha_effect,beta_allele,beta_frequency,beta_effect,difference
0,1.0,2,0.51896,0.59579,0.293510,0.923530,0.33750,0.309830,0.98767,0.32129,...,1.0,0.866,1,1,0.866,0.0,3,0.134,0.0,0.0
1,1.0,2,0.78707,0.45632,-0.104860,0.118500,0.73096,-0.120000,0.12514,0.72383,...,3.0,0.900,2,3,0.900,0.0,1,0.100,0.0,0.0
2,1.0,2,0.05185,0.94949,-0.018675,0.006630,0.93517,-0.067814,0.07424,0.78549,...,0.0,0.864,3,0,0.864,0.0,2,0.136,0.0,0.0
3,1.0,2,1.33858,0.26413,0.556010,2.584900,0.10917,-0.415990,1.22311,0.26984,...,2.0,0.938,4,2,0.938,0.0,0,0.062,0.0,0.0
4,1.0,2,0.01959,0.98060,-0.004205,0.000572,0.98093,-0.031043,0.02994,0.86277,...,0.0,0.758,5,0,0.758,0.0,2,0.242,0.0,0.0
5,1.0,2,0.08504,0.91850,-0.039359,0.047700,0.82729,0.023720,0.01514,0.90216,...,0.0,0.774,6,0,0.774,0.0,2,0.226,0.0,0.0
6,1.0,2,1.15873,0.31560,-0.201930,0.696130,0.40490,0.070870,0.07531,0.78398,...,0.0,0.868,8,0,0.868,0.0,2,0.132,0.0,0.0
7,1.0,2,0.17088,0.84302,-0.017539,0.014580,0.90398,0.070630,0.24341,0.62220,...,1.0,0.638,10,1,0.638,0.0,3,0.362,0.0,0.0
8,1.0,2,1.28007,0.27987,0.053460,0.140180,0.70843,0.181420,1.49021,0.22336,...,3.0,0.688,12,3,0.688,0.0,1,0.312,0.0,0.0
9,1.0,2,0.23555,0.79031,-0.035999,0.016840,0.89686,-0.098293,0.11047,0.73990,...,2.0,0.882,13,2,0.882,0.0,0,0.118,0.0,0.0


In [32]:
geo_results, geo_true_positives, geo_false_positives = icqg.calculate_power_fpr(power_fpr_raw_data, sample_sizes, 
                                                                             number_of_replicates, number_of_qtl)

In [34]:
geo_results.to_csv("icqg_geometric_power_fpr_results.txt", sep='\t')

In [None]:
qtl

In [None]:
saegus_to_tassel_loci[2]

In [None]:
popgen = (pop for pop in sample_library[0])

In [None]:
for pop in popgen:
    