In [22]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
from scipy import stats
from functools import reduce
from MyFunctions import read_fitness, read_bp, read_genes, read_robustness, perc_diff, name, mean_format, rank_sum_format

# Laptop
#INPUT_ROOT_DIR = "C://Users//davis//Dropbox//Freiburg Masters Semesters//Thesis//Results//Round02//"

# Desktop
INPUT_ROOT_DIR = "D://Dropbox_New//Dropbox//Freiburg Masters Semesters//Thesis//Results//Round02//"

OUTPUT_ROOT_DIR = INPUT_ROOT_DIR + "Graphics//all_analysis//"
WINDOW_SIZE = 10000
GENOME_WINDOW_SIZE = 5000

print ("Setting up environment...")
# Make sure that the output directories exist and create them if not
if not os.path.exists(OUTPUT_ROOT_DIR):
    os.mkdir(OUTPUT_ROOT_DIR)
    
# stat_fitness_global
if not os.path.exists(OUTPUT_ROOT_DIR + "fitness_global//perc_change"):
    os.makedirs(OUTPUT_ROOT_DIR + "fitness_global//perc_change//")
if not os.path.exists(OUTPUT_ROOT_DIR + "fitness_global//num_and_perc_change//"):
    os.makedirs(OUTPUT_ROOT_DIR + "fitness_global//num_and_perc_change//")
    
# stat_genes_global
if not os.path.exists(OUTPUT_ROOT_DIR + "genes_global//perc_change//"):
    os.makedirs(OUTPUT_ROOT_DIR + "genes_global//perc_change//")
if not os.path.exists(OUTPUT_ROOT_DIR + "genes_global//num_and_perc_change//"):
    os.makedirs(OUTPUT_ROOT_DIR + "genes_global//num_and_perc_change//")
    
# stat_genes_best (for non-coding)
if not os.path.exists(OUTPUT_ROOT_DIR + "genes_best//num_and_perc_change//"):
    os.makedirs(OUTPUT_ROOT_DIR + "genes_best//num_and_perc_change//")
if not os.path.exists(OUTPUT_ROOT_DIR + "bp_best//num_and_perc_change//"):
    os.makedirs(OUTPUT_ROOT_DIR + "bp_best//num_and_perc_change//")

# We need to be able to write out some statistics. Create an output file for this purpose.
LINE = "============================================="

plt.rc('legend',fontsize=20)
plt.rc('xtick',labelsize=15)
plt.rc('ytick',labelsize=15)

print("done.")

Setting up environment...
done.


In [3]:
print("Reading in stat_fitness_global data...")
# STAT_FITNESS_BEST
fitness_global_names = ['generation', 'pop_size', 'fitness', 'genome_size', 'metabolic_error','parents_metabolic_error', 'metabolic_fitness', 'secretion_error', 'parents_secretion_error', 'secretion_fitness', 'amt_compound_present']
# Control
df_seed01_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//control//stats//stat_fitness_glob.out")
df_seed02_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//control//stats//stat_fitness_glob.out")
df_seed03_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//control//stats//stat_fitness_glob.out")
df_seed04_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//control//stats//stat_fitness_glob.out")
df_seed05_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//control//stats//stat_fitness_glob.out")

# Mutation Up
df_seed01_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//mut_up//stats//stat_fitness_glob.out")
df_seed02_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//mut_up//stats//stat_fitness_glob.out")
df_seed03_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//mut_up//stats//stat_fitness_glob.out")
df_seed04_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//mut_up//stats//stat_fitness_glob.out")
df_seed05_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//mut_up//stats//stat_fitness_glob.out")

# Mutation Down
df_seed01_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//mut_down//stats//stat_fitness_glob.out")
df_seed02_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//mut_down//stats//stat_fitness_glob.out")
df_seed03_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//mut_down//stats//stat_fitness_glob.out")
df_seed04_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//mut_down//stats//stat_fitness_glob.out")
df_seed05_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//mut_down//stats//stat_fitness_glob.out")

# Selection Up
df_seed01_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//selection_up//stats//stat_fitness_glob.out")
df_seed02_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//selection_up//stats//stat_fitness_glob.out")
df_seed03_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//selection_up//stats//stat_fitness_glob.out")
df_seed04_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//selection_up//stats//stat_fitness_glob.out")
df_seed05_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//selection_up//stats//stat_fitness_glob.out")

# Selection Down
df_seed01_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//selection_down//stats//stat_fitness_glob.out")
df_seed02_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//selection_down//stats//stat_fitness_glob.out")
df_seed03_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//selection_down//stats//stat_fitness_glob.out")
df_seed04_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//selection_down//stats//stat_fitness_glob.out")
df_seed05_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//selection_down//stats//stat_fitness_glob.out")

# Population Up
df_seed01_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//pop_up//stats//stat_fitness_glob.out")
df_seed02_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//pop_up//stats//stat_fitness_glob.out")
df_seed03_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//pop_up//stats//stat_fitness_glob.out")
df_seed04_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//pop_up//stats//stat_fitness_glob.out")
df_seed05_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//pop_up//stats//stat_fitness_glob.out")

# Population Down
df_seed01_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//pop_down//stats//stat_fitness_glob.out")
df_seed02_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//pop_down//stats//stat_fitness_glob.out")
df_seed03_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//pop_down//stats//stat_fitness_glob.out")
df_seed04_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//pop_down//stats//stat_fitness_glob.out")
df_seed05_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//pop_down//stats//stat_fitness_glob.out")

print("done.")

Reading in stat_fitness_global data...
done.


In [21]:
# Set up file writer for stat_fitness_global
f = open(OUTPUT_ROOT_DIR+"fitness_global/STATS-fitness_global.txt","w")

# Get all of the generation numbers as a column
df_means = pd.DataFrame(df_seed01_control_fitness_global['generation'].copy())
df_means_last_50k = pd.DataFrame(df_seed01_control_fitness_global['generation'].iloc[450000:].copy())
df_perc_change = pd.DataFrame(df_seed01_control_fitness_global['generation'].copy())

seed_keys = ['seed01', 'seed02', 'seed03', 'seed04', 'seed05']
print("stat_fitness_global")
f.write("stat_fitness_global")

invalid_headers = ['generation', 'secretion_error', 'parents_secretion_error', 'secretion_fitness', 'amt_compound_present']

num_headers = len(df_seed01_mutation_up_fitness_global.columns)-len(invalid_headers)
progress_counter = 1

# For every column header in all of the fitness_global files
# Concatenate all five seeds for each condition to make calculating the mean easier 
df_control_tmp = pd.concat([df_seed01_control_fitness_global['fitness'], df_seed02_control_fitness_global['fitness'], df_seed03_control_fitness_global['fitness'], df_seed04_control_fitness_global['fitness'], df_seed05_control_fitness_global['fitness']], axis=1, keys=seed_keys)
df_mut_up_tmp = pd.concat([df_seed01_mutation_up_fitness_global['fitness'], df_seed02_mutation_up_fitness_global['fitness'], df_seed03_mutation_up_fitness_global['fitness'], df_seed04_mutation_up_fitness_global['fitness'], df_seed05_mutation_up_fitness_global['fitness']], axis=1, keys=seed_keys)
df_mut_down_tmp = pd.concat([df_seed01_mutation_down_fitness_global['fitness'], df_seed02_mutation_down_fitness_global['fitness'], df_seed03_mutation_down_fitness_global['fitness'], df_seed04_mutation_down_fitness_global['fitness'], df_seed05_mutation_down_fitness_global['fitness']], axis=1, keys=seed_keys)
df_pop_up_tmp = pd.concat([df_seed01_pop_up_fitness_global['fitness'], df_seed02_pop_up_fitness_global['fitness'], df_seed03_pop_up_fitness_global['fitness'], df_seed04_pop_up_fitness_global['fitness'], df_seed05_pop_up_fitness_global['fitness']], axis=1, keys=seed_keys)
df_pop_down_tmp = pd.concat([df_seed01_pop_down_fitness_global['fitness'], df_seed02_pop_down_fitness_global['fitness'], df_seed03_pop_down_fitness_global['fitness'], df_seed04_pop_down_fitness_global['fitness'], df_seed05_pop_down_fitness_global['fitness']], axis=1, keys=seed_keys)
df_selection_up_tmp = pd.concat([df_seed01_selection_up_fitness_global['fitness'], df_seed02_selection_up_fitness_global['fitness'], df_seed03_selection_up_fitness_global['fitness'], df_seed04_selection_up_fitness_global['fitness'], df_seed05_selection_up_fitness_global['fitness']], axis=1, keys=seed_keys)
df_selection_down_tmp = pd.concat([df_seed01_selection_down_fitness_global['fitness'], df_seed02_selection_down_fitness_global['fitness'], df_seed03_selection_down_fitness_global['fitness'], df_seed04_selection_down_fitness_global['fitness'], df_seed05_selection_down_fitness_global['fitness']], axis=1, keys=seed_keys)

df_seed01_control_avg = pd.DataFrame(df_seed01_control_fitness_global['generation'].copy())
df_seed02_control_avg = pd.DataFrame(df_seed02_control_fitness_global['generation'].copy())
df_seed03_control_avg = pd.DataFrame(df_seed03_control_fitness_global['generation'].copy())
df_seed04_control_avg = pd.DataFrame(df_seed04_control_fitness_global['generation'].copy())
df_seed05_control_avg = pd.DataFrame(df_seed05_control_fitness_global['generation'].copy())

df_seed01_control_avg['fitness'] = df_seed01_control_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed02_control_avg['fitness'] = df_seed02_control_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed03_control_avg['fitness'] = df_seed03_control_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed04_control_avg['fitness'] = df_seed04_control_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed05_control_avg['fitness'] = df_seed05_control_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()


df_seed01_mutation_up_avg = pd.DataFrame(df_seed01_mutation_up_fitness_global['generation'].copy())
df_seed02_mutation_up_avg = pd.DataFrame(df_seed02_mutation_up_fitness_global['generation'].copy())
df_seed03_mutation_up_avg = pd.DataFrame(df_seed03_mutation_up_fitness_global['generation'].copy())
df_seed04_mutation_up_avg = pd.DataFrame(df_seed04_mutation_up_fitness_global['generation'].copy())
df_seed05_mutation_up_avg = pd.DataFrame(df_seed05_mutation_up_fitness_global['generation'].copy())

df_seed01_mutation_up_avg['fitness'] = df_seed01_mutation_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed02_mutation_up_avg['fitness'] = df_seed02_mutation_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed03_mutation_up_avg['fitness'] = df_seed03_mutation_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed04_mutation_up_avg['fitness'] = df_seed04_mutation_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed05_mutation_up_avg['fitness'] = df_seed05_mutation_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()

df_seed01_mutation_down_avg = pd.DataFrame(df_seed01_mutation_down_fitness_global['generation'].copy())
df_seed02_mutation_down_avg = pd.DataFrame(df_seed02_mutation_down_fitness_global['generation'].copy())
df_seed03_mutation_down_avg = pd.DataFrame(df_seed03_mutation_down_fitness_global['generation'].copy())
df_seed04_mutation_down_avg = pd.DataFrame(df_seed04_mutation_down_fitness_global['generation'].copy())
df_seed05_mutation_down_avg = pd.DataFrame(df_seed05_mutation_down_fitness_global['generation'].copy())

df_seed01_mutation_down_avg['fitness'] = df_seed01_mutation_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed02_mutation_down_avg['fitness'] = df_seed02_mutation_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed03_mutation_down_avg['fitness'] = df_seed03_mutation_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed04_mutation_down_avg['fitness'] = df_seed04_mutation_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed05_mutation_down_avg['fitness'] = df_seed05_mutation_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()

df_seed01_selection_up_avg = pd.DataFrame(df_seed01_selection_up_fitness_global['generation'].copy())
df_seed02_selection_up_avg = pd.DataFrame(df_seed02_selection_up_fitness_global['generation'].copy())
df_seed03_selection_up_avg = pd.DataFrame(df_seed03_selection_up_fitness_global['generation'].copy())
df_seed04_selection_up_avg = pd.DataFrame(df_seed04_selection_up_fitness_global['generation'].copy())
df_seed05_selection_up_avg = pd.DataFrame(df_seed05_selection_up_fitness_global['generation'].copy())

df_seed01_selection_up_avg['fitness'] = df_seed01_selection_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed02_selection_up_avg['fitness'] = df_seed02_selection_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed03_selection_up_avg['fitness'] = df_seed03_selection_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed04_selection_up_avg['fitness'] = df_seed04_selection_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed05_selection_up_avg['fitness'] = df_seed05_selection_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()

df_seed01_selection_down_avg = pd.DataFrame(df_seed01_selection_down_fitness_global['generation'].copy())
df_seed02_selection_down_avg = pd.DataFrame(df_seed02_selection_down_fitness_global['generation'].copy())
df_seed03_selection_down_avg = pd.DataFrame(df_seed03_selection_down_fitness_global['generation'].copy())
df_seed04_selection_down_avg = pd.DataFrame(df_seed04_selection_down_fitness_global['generation'].copy())
df_seed05_selection_down_avg = pd.DataFrame(df_seed05_selection_down_fitness_global['generation'].copy())

df_seed01_selection_down_avg['fitness'] = df_seed01_selection_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed02_selection_down_avg['fitness'] = df_seed02_selection_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed03_selection_down_avg['fitness'] = df_seed03_selection_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed04_selection_down_avg['fitness'] = df_seed04_selection_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed05_selection_down_avg['fitness'] = df_seed05_selection_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()

df_seed01_pop_up_avg = pd.DataFrame(df_seed01_pop_up_fitness_global['generation'].copy())
df_seed02_pop_up_avg = pd.DataFrame(df_seed02_pop_up_fitness_global['generation'].copy())
df_seed03_pop_up_avg = pd.DataFrame(df_seed03_pop_up_fitness_global['generation'].copy())
df_seed04_pop_up_avg = pd.DataFrame(df_seed04_pop_up_fitness_global['generation'].copy())
df_seed05_pop_up_avg = pd.DataFrame(df_seed05_pop_up_fitness_global['generation'].copy())

df_seed01_pop_up_avg['fitness'] = df_seed01_pop_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed02_pop_up_avg['fitness'] = df_seed02_pop_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed03_pop_up_avg['fitness'] = df_seed03_pop_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed04_pop_up_avg['fitness'] = df_seed04_pop_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed05_pop_up_avg['fitness'] = df_seed05_pop_up_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()

df_seed01_pop_down_avg = pd.DataFrame(df_seed01_pop_down_fitness_global['generation'].copy())
df_seed02_pop_down_avg = pd.DataFrame(df_seed02_pop_down_fitness_global['generation'].copy())
df_seed03_pop_down_avg = pd.DataFrame(df_seed03_pop_down_fitness_global['generation'].copy())
df_seed04_pop_down_avg = pd.DataFrame(df_seed04_pop_down_fitness_global['generation'].copy())
df_seed05_pop_down_avg = pd.DataFrame(df_seed05_pop_down_fitness_global['generation'].copy())

df_seed01_pop_down_avg['fitness'] = df_seed01_pop_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed02_pop_down_avg['fitness'] = df_seed02_pop_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed03_pop_down_avg['fitness'] = df_seed03_pop_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed04_pop_down_avg['fitness'] = df_seed04_pop_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()
df_seed05_pop_down_avg['fitness'] = df_seed05_pop_down_fitness_global['fitness'].rolling(WINDOW_SIZE).mean()

# Calculate the mean across the seeds for each condition and also smooth the results with a rolling window        df_means['control'] = df_control_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
df_means['control'] = df_control_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
df_means['mut_up'] = df_mut_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
df_means['mut_down'] = df_mut_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
df_means['pop_up'] = df_pop_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
df_means['pop_down'] = df_pop_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
df_means['selection_up'] = df_selection_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
df_means['selection_down'] = df_selection_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()

print("done")

stat_fitness_global
done


In [34]:
print("\t\tGenerating fitness histogram...")
fig_plot, ax_plot = plt.subplots(nrows=7,ncols=1,figsize=(20,30), sharey=True)
fig_plot.suptitle("Population Fitness Histogram", fontsize=32)

matplotlib.rc('axes', titlesize=25)

ax_plot[0].set_title("Control", fontsize=25)
ax_plot[0].set_ylabel("Count", fontsize=20)

ax_plot[1].set_title("Mutation Up", fontsize=25)
#ax_plot[1].set_xlabel("Generation", fontsize=20)
ax_plot[1].set_ylabel("Count", fontsize=20)

ax_plot[2].set_title("Mutation Down", fontsize=25)
#ax_plot[2].set_xlabel("Generation", fontsize=20)
ax_plot[2].set_ylabel("Count", fontsize=20)

ax_plot[3].set_title("Selection Up", fontsize=25)
#ax_plot[3].set_xlabel("Generation", fontsize=20)
ax_plot[3].set_ylabel("Count", fontsize=20)

ax_plot[4].set_title("Selection Down", fontsize=25)
#ax_plot[4].set_xlabel("Generation", fontsize=20)
ax_plot[4].set_ylabel("Count", fontsize=20)

ax_plot[5].set_title("Population Up", fontsize=25)
#ax_plot[5].set_xlabel("Generation", fontsize=20)
ax_plot[5].set_ylabel("Count", fontsize=20)

ax_plot[6].set_title("Population Down", fontsize=25)
#ax_plot[6].set_xlabel("Generation", fontsize=20)
ax_plot[6].set_ylabel("Count", fontsize=20)

df_means.hist(column=['control','mut_up', 'mut_down', 'selection_up', 'selection_down', 'pop_up', 'pop_down'], bins=50, ax=ax_plot)

fig_plot.savefig(OUTPUT_ROOT_DIR+"fitness_global/global_fitness_histogram.png")

fig_plot.clear()
plt.close(fig_plot)

print("Done.")

		Generating fitness histogram...
Done.
