# Comparison of all conditions
In this Jupyter notebook we will graph out the average value across all five seeds for each condition and compare it with the control condition, graphing the results. 

## Quick links:
[set up the environment](#setup_environment)

### stat_fitness_global
[read in the data](#stat_fitness_global_read) || [graph the data](#stat_fitness_global_graph)<br>

### stat_genes_global
[read in the data](#stat_genes_global_read) || [graph the data](#stat_genes_global_graph)<br>

### Robustness
[read in the data](#robustness_read) || [graph the data](#robustness_graph)<br>

### Evolvability
[read in the data](#robustness_read) || [graph the data](#evolvability_graph)<br>

### Statistics
[Mann-Whitney](#statistics)<br>


## Set up the environment

<a id='setup_environment'></a>

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from scipy import stats
from functools import reduce
from MyFunctions import read_fitness, read_bp, read_genes, read_robustness, perc_diff

# Laptop
#INPUT_ROOT_DIR = "C://Users//davis//Dropbox//Freiburg Masters Semesters//Thesis//Results//Round02//"

# Desktop
INPUT_ROOT_DIR = "D://Dropbox_New//Dropbox//Freiburg Masters Semesters//Thesis//Results//Round02//"

OUTPUT_ROOT_DIR = INPUT_ROOT_DIR + "Graphics//all_analysis//"
WINDOW_SIZE = 10000

print ("Setting up environment...")
# Make sure that the output directories exist and create them if not
if not os.path.exists(OUTPUT_ROOT_DIR):
    os.mkdir(OUTPUT_ROOT_DIR)
if not os.path.exists(OUTPUT_ROOT_DIR + "fitness_global//perc_change"):
    os.makedirs(OUTPUT_ROOT_DIR + "fitness_global//perc_change//")
if not os.path.exists(OUTPUT_ROOT_DIR + "genes_global//perc_change//"):
    os.makedirs(OUTPUT_ROOT_DIR + "genes_global//perc_change//")

# We need to be able to write out some statistics. Create an output file for this purpose.
LINE = "============================================="

print("done.")

Setting up environment...
done.


<a id='stat_fitness_global_read'></a>

# stat_fitness_global

## Read in the data for stat_fitness_global

In [3]:
print("Reading in stat_fitness_global data...")
# STAT_FITNESS_BEST
fitness_global_names = ['generation', 'pop_size', 'fitness', 'genome_size', 'metabolic_error','parents_metabolic_error', 'metabolic_fitness', 'secretion_error', 'parents_secretion_error', 'secretion_fitness', 'amt_compound_present']
# Control
df_seed01_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//control//stats//stat_fitness_glob.out")
df_seed02_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//control//stats//stat_fitness_glob.out")
df_seed03_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//control//stats//stat_fitness_glob.out")
df_seed04_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//control//stats//stat_fitness_glob.out")
df_seed05_control_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//control//stats//stat_fitness_glob.out")

# Mutation Up
df_seed01_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//mut_up//stats//stat_fitness_glob.out")
df_seed02_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//mut_up//stats//stat_fitness_glob.out")
df_seed03_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//mut_up//stats//stat_fitness_glob.out")
df_seed04_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//mut_up//stats//stat_fitness_glob.out")
df_seed05_mutation_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//mut_up//stats//stat_fitness_glob.out")

# Mutation Down
df_seed01_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//mut_down//stats//stat_fitness_glob.out")
df_seed02_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//mut_down//stats//stat_fitness_glob.out")
df_seed03_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//mut_down//stats//stat_fitness_glob.out")
df_seed04_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//mut_down//stats//stat_fitness_glob.out")
df_seed05_mutation_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//mut_down//stats//stat_fitness_glob.out")

# Selection Up
df_seed01_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//selection_up//stats//stat_fitness_glob.out")
df_seed02_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//selection_up//stats//stat_fitness_glob.out")
df_seed03_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//selection_up//stats//stat_fitness_glob.out")
df_seed04_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//selection_up//stats//stat_fitness_glob.out")
#df_seed05_selection_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//selection_up//stats//stat_fitness_glob.out")

# Selection Down
df_seed01_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//selection_down//stats//stat_fitness_glob.out")
df_seed02_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//selection_down//stats//stat_fitness_glob.out")
df_seed03_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//selection_down//stats//stat_fitness_glob.out")
df_seed04_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//selection_down//stats//stat_fitness_glob.out")
#df_seed05_selection_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//selection_down//stats//stat_fitness_glol.out")

# Population Up
#df_seed01_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//pop_up//stats//stat_fitness_glob.out")
#df_seed02_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//pop_up//stats//stat_fitness_glob.out")
#df_seed03_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//pop_up//stats//stat_fitness_glob.out")
#df_seed04_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//pop_up//stats//stat_fitness_glob.out")
#df_seed05_pop_up_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//pop_up//stats//stat_fitness_glob.out")

# Population Down
df_seed01_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed01//pop_down//stats//stat_fitness_glob.out")
df_seed02_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed02//pop_down//stats//stat_fitness_glob.out")
df_seed03_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed03//pop_down//stats//stat_fitness_glob.out")
df_seed04_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed04//pop_down//stats//stat_fitness_glob.out")
df_seed05_pop_down_fitness_global = read_fitness(INPUT_ROOT_DIR + "seed05//pop_down//stats//stat_fitness_glob.out")

print("done.")

Reading in stat_fitness_global data...
done.


<a id='stat_fitness_global_graph'></a>

## Graph out stat_fitness_global

In [4]:
# Set up file writer for stat_fitness_global
f = open(OUTPUT_ROOT_DIR+"fitness_global/fitness_global_mann-whitney.txt","w")

# Get all of the generation numbers as a column
df_means = pd.DataFrame(df_seed01_control_fitness_global['generation'].copy())
df_perc_change = pd.DataFrame(df_seed01_control_fitness_global['generation'].copy())

seed_keys = ['seed01', 'seed02', 'seed03', 'seed04', 'seed05']
print("stat_fitness_global")
f.write("stat_fitness_global")


# For every column header in all of the fitness_global files
for col_header in df_seed01_mutation_up_fitness_global.columns:
    if col_header == 'generation':
        continue
    else:
        print("\t"+col_header)
        # Concatenate all five seeds for each condition to make calculating the mean easier 
        df_control_tmp = pd.concat([df_seed01_control_fitness_global[col_header], df_seed02_control_fitness_global[col_header], df_seed03_control_fitness_global[col_header], df_seed04_control_fitness_global[col_header], df_seed05_control_fitness_global[col_header]], axis=1, keys=seed_keys)
        df_mut_up_tmp = pd.concat([df_seed01_mutation_up_fitness_global[col_header], df_seed02_mutation_up_fitness_global[col_header], df_seed03_mutation_up_fitness_global[col_header], df_seed04_mutation_up_fitness_global[col_header], df_seed05_mutation_up_fitness_global[col_header]], axis=1, keys=seed_keys)
        df_mut_down_tmp = pd.concat([df_seed01_mutation_down_fitness_global[col_header], df_seed02_mutation_down_fitness_global[col_header], df_seed03_mutation_down_fitness_global[col_header], df_seed04_mutation_down_fitness_global[col_header], df_seed05_mutation_down_fitness_global[col_header]], axis=1, keys=seed_keys)
        #df_pop_up_tmp = pd.concat([df_seed01_pop_up_fitness_global[col_header], df_seed02_pop_up_fitness_global[col_header], df_seed03_pop_up_fitness_global[col_header], df_seed04_pop_up_fitness_global[col_header], df_seed05_pop_up_fitness_global[col_header]], axis=1, keys=seed_keys)        
        df_pop_down_tmp = pd.concat([df_seed01_pop_down_fitness_global[col_header], df_seed02_pop_down_fitness_global[col_header], df_seed03_pop_down_fitness_global[col_header], df_seed04_pop_down_fitness_global[col_header], df_seed05_pop_down_fitness_global[col_header]], axis=1, keys=seed_keys)
        df_selection_up_tmp = pd.concat([df_seed01_selection_up_fitness_global[col_header], df_seed02_selection_up_fitness_global[col_header], df_seed03_selection_up_fitness_global[col_header], df_seed04_selection_up_fitness_global[col_header]], axis=1, keys=seed_keys) #df_seed05_selection_up_fitness_global[col_header]], axis=1, keys=seed_keys)
        df_selection_down_tmp = pd.concat([df_seed01_selection_down_fitness_global[col_header], df_seed02_selection_down_fitness_global[col_header], df_seed03_selection_down_fitness_global[col_header], df_seed04_selection_down_fitness_global[col_header]], axis=1, keys=seed_keys)# df_seed05_selection_down_fitness_global[col_header]], axis=1, keys=seed_keys)
        
        # Calculate the mean across the seeds for each condition and also smooth the results with a rolling window        df_means['control'] = df_control_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['control'] = df_control_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['mut_up'] = df_mut_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['mut_down'] = df_mut_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        #df_means['pop_up'] = df_pop_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['pop_down'] = df_pop_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['selection_up'] = df_selection_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['selection_down'] = df_selection_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
                
        # Also calculate the percent change and add it to that DataFrame
        df_perc_change['mut_up'] = 100*perc_diff(df_means['mut_up'], df_means['control'])
        df_perc_change['mut_down'] = 100*perc_diff(df_means['mut_down'], df_means['control'])
        #df_perc_change['pop_up'] = 100*perc_diff(df_means['pop_up'], df_means['control'])
        df_perc_change['pop_down'] = 100*perc_diff(df_means['pop_down'], df_means['control'])
        df_perc_change['selection_up'] = 100*perc_diff(df_means['selection_up'], df_means['control'])
        df_perc_change['selection_down'] = 100*perc_diff(df_means['selection_down'], df_means['control'])
        
        # Plot all conditions to the figure
        fig = plt.figure(figsize=(30,20))
        ax = fig.add_subplot(111)
        ax.set_title("stat_fitness_global " + col_header, fontsize=32)
        ax.set_xlabel("Generation", fontsize=20)
        ax.set_ylabel(col_header, fontsize=20)
        df_means.plot(x='generation', y='control', label='control', ax=ax)
        df_means.plot(x='generation', y='mut_up', label='mut_up', ax=ax)
        df_means.plot(x='generation', y='mut_down', label='mut_down', ax=ax)
        #df_means.plot(x='generation', y='pop_up', label='pop_up', ax=ax)
        df_means.plot(x='generation', y='pop_down', label='pop_down', ax=ax)
        df_means.plot(x='generation', y='selection_up', label='selection_up', ax=ax)
        df_means.plot(x='generation', y='selection_down', label='selection_down', ax=ax)
        
        fig.savefig(OUTPUT_ROOT_DIR+"fitness_global/stat_fitness_global_mean_"+col_header+".png")
        fig.clear()
        plt.close(fig)
        
        # Also create a new figure for the percent change
        fig = plt.figure(figsize=(30,20))
        ax = fig.add_subplot(111)
        ax.set_title("Percent Change - stat_fitness_global " + col_header, fontsize=32)
        ax.set_xlabel("Generation", fontsize=20)
        ax.set_ylabel(col_header, fontsize=20)
        
        # Plot all conditions
        df_perc_change.plot(x='generation', y='mut_up', label='mut_up', ax=ax)
        df_perc_change.plot(x='generation', y='mut_down', label='mut_down', ax=ax)
        #df_perc_change.plot(x='generation', y='pop_up', label='pop_up', ax=ax)
        df_perc_change.plot(x='generation', y='pop_down', label='pop_down', ax=ax)
        df_perc_change.plot(x='generation', y='selection_up', label='selection_up', ax=ax)
        df_perc_change.plot(x='generation', y='selection_down', label='selection_down', ax=ax)
        
        # Save the figure
        fig.savefig(OUTPUT_ROOT_DIR +"fitness_global/perc_change/stat_fitness_perc_change_"+col_header+".png")
        fig.clear()
        plt.close(fig)
        
        # Calculate the Mann-Whitney test for all conditions vs. the control condition. 
        rank_sum_control, p_control = stats.mannwhitneyu(df_means['control'], df_means['control'], alternative='less', use_continuity=False)
        rank_sum_mut_up, p_mut_up = stats.mannwhitneyu(df_means['control'], df_means['mut_up'], alternative='less', use_continuity=False)
        rank_sum_mut_down, p_mut_down = stats.mannwhitneyu(df_means['control'], df_means['mut_down'], alternative='less', use_continuity=False)
        #rank_sum_selection_up, p_selection_up = stats.mannwhitneyu(df_means['control'], df_means['selection_up'])
        #rank_sum_selection_down, p_selection_down = stats.mannwhitneyu(df_means['control'], df_means['selection_down'])
        #rank_sum_population_up, p_population_up = stats.mannwhitneyu(df_means['control'], df_means['pop_up'])
        rank_sum_population_down, p_population_down = stats.mannwhitneyu(df_means['control'], df_means['pop_down'],alternative='less', use_continuity=False)


        # Print out the results of the Mann-Whitney tests
        f.write(LINE)
        f.write("\n\t\t\t\t"+col_header)
        f.write("\n\t\t\trank sum\t\tp-value\n")
        f.write(LINE)
        f.write("\nControl \t\t" + str(rank_sum_control) + "\t%.15f" %p_control)
        f.write("\nMutation Up \t\t" + str(rank_sum_mut_up) + "\t%.15f" %p_mut_up)
        f.write("\nMutation Down \t\t" + str(rank_sum_mut_down) + "\t%.15f" %p_mut_down)
        #f.write("\nPopulation Up \t" + str(rank_sum_population_up) + "\t%.15f" %p_pop_up)
        f.write("\nPopulation Down \t" + str(rank_sum_population_down) + "\t%.15f" %p_population_down)
        #f.write("\nSelection Up \t" + str(rank_sum_selection_up) + "\t%.15f" %p_selection_up)
        #f.write("\nSelection Down \t" + str(rank_sum_selection_down) + "\t%.15f" %p_selection_down)
        f.write("\n"+LINE+"\n")
        
plt.close('all')
f.close()
print("Graphing of stat_fitness_global complete!")



stat_fitness_global
	pop_size
	fitness
	genome_size
	metabolic_error
	parents_metabolic_error
	metabolic_fitness
	secretion_error
	parents_secretion_error
	secretion_fitness
	amt_compound_present
Graphing of stat_fitness_global complete!


### Delete unneeded DataFrames

In [5]:
# Delete the DataFrames that are no longer needed
print ("Deleting unneeded DataFrames")
del df_means, df_control_tmp, df_mut_up_tmp, df_mut_down_tmp, df_pop_down_tmp, df_selection_up_tmp, df_selection_down_tmp #<- Delete until mut_down!!! df_pop_up_mp, df_pop_down_tmp, df_selection_up_tmp, df_selection_down_tmp
del df_seed01_control_fitness_global, df_seed02_control_fitness_global, df_seed03_control_fitness_global, df_seed04_control_fitness_global, df_seed05_control_fitness_global
del df_seed01_mutation_up_fitness_global, df_seed02_mutation_up_fitness_global, df_seed03_mutation_up_fitness_global, df_seed04_mutation_up_fitness_global, df_seed05_mutation_up_fitness_global
del df_seed01_mutation_down_fitness_global, df_seed02_mutation_down_fitness_global, df_seed03_mutation_down_fitness_global, df_seed04_mutation_down_fitness_global, df_seed05_mutation_down_fitness_global
#del df_seed01_selection_up_fitness_global, df_seed02_selection_up_fitness_global, df_seed03_selection_up_fitness_global, df_seed04_selection_up_fitness_global, df_seed05_selection_up_fitness_global
#del df_seed01_selection_down_fitness_global, df_seed02_selection_down_fitness_global, df_seed03_selection_down_fitness_global, df_seed04_selection_down_fitness_global, df_seed05_selection_down_fitness_global
#del df_seed01_pop_up_fitness_global, df_seed02_pop_up_fitness_global, df_seed03_pop_up_fitness_global, df_seed04_pop_up_fitness_global, df_seed05_pop_up_fitness_global
del df_seed01_pop_down_fitness_global, df_seed02_pop_down_fitness_global, df_seed03_pop_down_fitness_global, df_seed04_pop_down_fitness_global, df_seed05_pop_down_fitness_global
print("Done")

Deleting unneeded DataFrames
Done


<a id='stat_genes_global_read'></a>

# stat_genes_global

## Read in the data

In [9]:
print("Reading in stat_genes_global")


# STAT_GENES_BEST
genes_global_names = ['generation', 'num_coding_RNAs', 'num_non-coding_RNAs', 'avg_size_of_coding_RNAs', 'avg_size_of_non-coding_RNAs', 'num_functional_genes', 'num_non-functional_genes', 'avg_size_of_functional_genes', 'avg_size_of_non-functional_genes']

# Control
df_seed01_control_genes_global = read_genes(INPUT_ROOT_DIR + "seed01//control//stats//stat_genes_glob.out")
df_seed02_control_genes_global = read_genes(INPUT_ROOT_DIR + "seed02//control//stats//stat_genes_glob.out")
df_seed03_control_genes_global = read_genes(INPUT_ROOT_DIR + "seed03//control//stats//stat_genes_glob.out")
df_seed04_control_genes_global = read_genes(INPUT_ROOT_DIR + "seed04//control//stats//stat_genes_glob.out")
df_seed05_control_genes_global = read_genes(INPUT_ROOT_DIR + "seed05//control//stats//stat_genes_glob.out")

# Mutation Up
df_seed01_mutation_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed01//mut_up//stats//stat_genes_glob.out")
df_seed02_mutation_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed02//mut_up//stats//stat_genes_glob.out")
df_seed03_mutation_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed03//mut_up//stats//stat_genes_glob.out")
df_seed04_mutation_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed04//mut_up//stats//stat_genes_glob.out")
df_seed05_mutation_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed05//mut_up//stats//stat_genes_glob.out")

# Mutation Down
df_seed01_mutation_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed01//mut_down//stats//stat_genes_glob.out")
df_seed02_mutation_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed02//mut_down//stats//stat_genes_glob.out")
df_seed03_mutation_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed03//mut_down//stats//stat_genes_glob.out")
df_seed04_mutation_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed04//mut_down//stats//stat_genes_glob.out")
df_seed05_mutation_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed05//mut_down//stats//stat_genes_glob.out")

# Selection Up
df_seed01_selection_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed01//selection_up//stats//stat_genes_glob.out")
df_seed02_selection_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed02//selection_up//stats//stat_genes_glob.out")
df_seed03_selection_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed03//selection_up//stats//stat_genes_glob.out")
df_seed04_selection_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed04//selection_up//stats//stat_genes_glob.out")
#df_seed05_selection_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed05//selection_up//stats//stat_genes_glob.out")

# Selection Down
df_seed01_selection_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed01//selection_down//stats//stat_genes_glob.out")
df_seed02_selection_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed02//selection_down//stats//stat_genes_glob.out")
df_seed03_selection_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed03//selection_down//stats//stat_genes_glob.out")
#df_seed04_selection_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed04//selection_down//stats//stat_genes_glob.out")
#df_seed05_selection_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed05//selection_down//stats//stat_genes_glob.out")

# Population Up
#df_seed01_pop_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed01//pop_up//stats//stat_genes_glob.out")
#df_seed02_pop_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed02//pop_up//stats//stat_genes_glob.out")
#df_seed03_pop_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed03//pop_up//stats//stat_genes_glob.out")
#df_seed04_pop_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed04//pop_up//stats//stat_genes_glob.out")
#df_seed05_pop_up_genes_global = read_genes(INPUT_ROOT_DIR + "seed05//pop_up//stats//stat_genes_glob.out")

# Population Down
df_seed01_pop_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed01//pop_down//stats//stat_genes_glob.out")
df_seed02_pop_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed02//pop_down//stats//stat_genes_glob.out")
df_seed03_pop_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed03//pop_down//stats//stat_genes_glob.out")
df_seed04_pop_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed04//pop_down//stats//stat_genes_glob.out")
df_seed05_pop_down_genes_global = read_genes(INPUT_ROOT_DIR + "seed05//pop_down//stats//stat_genes_glob.out")

print("done.")

Reading in stat_genes_global
done.


<a id='stat_genes_global_graph'></a>

## Graph results of stat_genes_global

In [10]:
f = open(OUTPUT_ROOT_DIR+"genes_global/genes_global_mann-whitney.txt","w")

# Get all of the generation numbers as a column
df_means = pd.DataFrame(df_seed01_control_genes_global['generation'].copy())
df_perc_change = pd.DataFrame(df_seed01_control_genes_global['generation'].copy())
seed_keys = ['seed01','seed02', 'seed03', 'seed04', 'seed05']

print("stat_genes_global")
f.write("stat_genes_global\n")

# For every column header in all of the genes_global files
for col_header in df_seed01_mutation_up_genes_global.columns:
    if col_header == 'generation':
        continue
    else:
        print("\t"+col_header)
        # Concatenate all five seeds for each condition to make calculating the mean easier 
        df_control_tmp = pd.concat([df_seed01_control_genes_global[col_header], df_seed02_control_genes_global[col_header], df_seed03_control_genes_global[col_header], df_seed04_control_genes_global[col_header], df_seed05_control_genes_global[col_header]], axis=1, keys=seed_keys)
        df_mut_up_tmp = pd.concat([df_seed01_mutation_up_genes_global[col_header], df_seed02_mutation_up_genes_global[col_header], df_seed03_mutation_up_genes_global[col_header], df_seed04_mutation_up_genes_global[col_header], df_seed05_mutation_up_genes_global[col_header]], axis=1, keys=seed_keys)
        df_mut_down_tmp = pd.concat([df_seed01_mutation_down_genes_global[col_header], df_seed02_mutation_down_genes_global[col_header], df_seed03_mutation_down_genes_global[col_header], df_seed04_mutation_down_genes_global[col_header], df_seed05_mutation_down_genes_global[col_header]], axis=1, keys=seed_keys)
        #df_pop_up_tmp = pd.concat([df_seed01_pop_up_genes_global[col_header], df_seed02_pop_up_genes_global[col_header], df_seed03_pop_up_genes_global[col_header], df_seed04_pop_up_genes_global[col_header], df_seed05_pop_up_genes_global[col_header]], axis=1, keys=seed_keys)        
        df_pop_down_tmp = pd.concat([df_seed01_pop_down_genes_global[col_header], df_seed02_pop_down_genes_global[col_header], df_seed03_pop_down_genes_global[col_header], df_seed04_pop_down_genes_global[col_header], df_seed05_pop_down_genes_global[col_header]], axis=1, keys=seed_keys)
        df_selection_up_tmp = pd.concat([df_seed01_selection_up_genes_global[col_header], df_seed02_selection_up_genes_global[col_header], df_seed03_selection_up_genes_global[col_header],df_seed04_selection_up_genes_global[col_header]], axis=1, keys=seed_keys) # df_seed05_selection_up_genes_global[col_header]], axis=1, keys=seed_keys)
        df_selection_down_tmp = pd.concat([df_seed01_selection_down_genes_global[col_header], df_seed02_selection_down_genes_global[col_header], df_seed03_selection_down_genes_global[col_header]], axis=1, keys=seed_keys)# df_seed04_selection_down_genes_global[col_header]], axis=1, keys=seed_keys)# df_seed05_selection_down_genes_global[col_header]], axis=1, keys=seed_keys)
        
        # Calculate the mean across the seeds for each condition and also smooth the results with a rolling window        df_means['control'] = df_control_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['control'] = df_control_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['mut_up'] = df_mut_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['mut_down'] = df_mut_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        #df_means['pop_up'] = df_pop_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['pop_down'] = df_pop_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['selection_up'] = df_selection_up_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
        df_means['selection_down'] = df_selection_down_tmp.mean(axis=1).rolling(WINDOW_SIZE).mean()
               
        # Also calculate the percent change and add it to that DataFrame
        df_perc_change['mut_up'] = 100*perc_diff(df_means['mut_up'], df_means['control'])
        df_perc_change['mut_down'] = 100*perc_diff(df_means['mut_down'], df_means['control'])
        #df_perc_change['pop_up'] = 100*perc_diff(df_means['pop_up'], df_means['control'])
        df_perc_change['pop_down'] = 100*perc_diff(df_means['pop_down'], df_means['control'])
        df_perc_change['selection_up'] = 100*perc_diff(df_means['selection_up'], df_means['control'])
        df_perc_change['selection_down'] = 100*perc_diff(df_means['selection_down'], df_means['control'])
        
        # Plot all conditions to the figure
        fig = plt.figure(figsize=(30,20))
        ax = fig.add_subplot(111)
        ax.set_title("stat_genes_global "+ col_header, fontsize=32)
        ax.set_xlabel("Generation", fontsize=20)
        ax.set_ylabel(col_header, fontsize=20)
        df_means.plot(x='generation', y='control', label='control', ax=ax)
        df_means.plot(x='generation', y='mut_up', label='mut_up', ax=ax)
        df_means.plot(x='generation', y='mut_down', label='mut_down', ax=ax)
        #df_means.plot(x='generation', y='pop_up', label='pop_up', ax=ax)
        df_means.plot(x='generation', y='pop_down', label='pop_down', ax=ax)
        df_means.plot(x='generation', y='selection_up', label='selection_up', ax=ax)
        df_means.plot(x='generation', y='selection_down', label='selection_down', ax=ax)
        
        fig.savefig(OUTPUT_ROOT_DIR+"genes_global/stat_genes_global_mean_"+col_header+".png")
        fig.clear()
        plt.close(fig)
        
        # Also create a new figure for the percent change
        fig = plt.figure(figsize=(30,20))
        ax = fig.add_subplot(111)
        ax.set_title("Percent Change - " + col_header, fontsize=32)
        ax.set_xlabel("Generation", fontsize=20)
        ax.set_ylabel(col_header, fontsize=20)
        
        # Plot all conditions
        df_perc_change.plot(x='generation', y='mut_up', label='mut_up', ax=ax)
        df_perc_change.plot(x='generation', y='mut_down', label='mut_down', ax=ax)
        #df_perc_change.plot(x='generation', y='pop_up', label='pop_up', ax=ax)
        df_perc_change.plot(x='generation', y='pop_down', label='pop_down', ax=ax)
        df_perc_change.plot(x='generation', y='selection_up', label='selection_up', ax=ax)
        df_perc_change.plot(x='generation', y='selection_down', label='selection_down', ax=ax)
        
        # Save the figure
        fig.savefig(OUTPUT_ROOT_DIR +"genes_global/perc_change/stat_genes_global_perc_change_"+col_header+".png")
        fig.clear()
        plt.close(fig)
        
        # Calculate the Mann-Whitney test for all conditions vs. the control condition. 
        rank_sum_control, p_control = stats.mannwhitneyu(df_means['control'], df_means['control'])
        rank_sum_mut_up, p_mut_up = stats.mannwhitneyu(df_means['control'], df_means['mut_up'])
        rank_sum_mut_down, p_mut_down = stats.mannwhitneyu(df_means['control'], df_means['mut_down'])
        #rank_sum_selection_up, p_selection_up = stats.mannwhitneyu(df_means['control'], df_means['selection_up'])
        #rank_sum_selection_down, p_selection_down = stats.mannwhitneyu(df_means['control'], df_means['selection_down'])
        #rank_sum_population_up, p_population_up = stats.mannwhitneyu(df_means['control'], df_means['pop_up'])
        rank_sum_population_down, p_population_down = stats.mannwhitneyu(df_means['control'], df_means['pop_down'])


        # Print out the results of the Mann-Whitney tests
        f.write(LINE)
        f.write("\n\t\t\t\t"+col_header)
        f.write("\n\t\t\trank sum\t\tp-value\n")
        f.write(LINE)
        f.write("\nControl \t\t" + str(rank_sum_control) + "\t%.15f" %p_control)
        f.write("\nMutation Up \t\t" + str(rank_sum_mut_up) + "\t%.15f" %p_mut_up)
        f.write("\nMutation Down \t\t" + str(rank_sum_mut_down) + "\t%.15f" %p_mut_down)
        #f.write("\nPopulation Up \t" + str(rank_sum_population_up) + "\t%.15f" %p_pop_up)
        f.write("\nPopulation Down \t" + str(rank_sum_population_down) + "\t%.15f" %p_population_down)
        #f.write("\nSelection Up \t" + str(rank_sum_selection_up) + "\t%.15f" %p_selection_up)
        #f.write("\nSelection Down \t" + str(rank_sum_selection_down) + "\t%.15f" %p_selection_down)
        f.write("\n"+LINE+"\n")
        
plt.close('all') 
f.close()
print("Graphing of stat_genes_global complete!")

stat_genes_global
	num_coding_RNAs
	num_non-coding_RNAs
	avg_size_of_coding_RNAs
	avg_size_of_non-coding_RNAs
	num_functional_genes
	num_non-functional_genes
	avg_size_of_functional_genes
	avg_size_of_non-functional_genes
Graphing of stat_genes_global complete!


### Delete unneeded DataFrames

In [10]:
# Delete the DataFrames that are no longer needed
print ("Deleting unneeded DataFrames")
del df_means, df_control_tmp, df_mut_up_tmp, df_mut_down_tmp, df_pop_down_tmp, df_selection_up_tmp, df_selection_down_tmp #<- Delete until mut_down!!! df_pop_up_mp, df_pop_down_tmp, df_selection_up_tmp, df_selection_down_tmp
del df_seed01_control_genes_global, df_seed02_control_genes_global, df_seed03_control_genes_global, df_seed04_control_genes_global, df_seed05_control_genes_global
del df_seed01_mutation_up_genes_global, df_seed02_mutation_up_genes_global, df_seed03_mutation_up_genes_global, df_seed04_mutation_up_genes_global, df_seed05_mutation_up_genes_global
del df_seed01_mutation_down_genes_global, df_seed02_mutation_down_genes_global, df_seed03_mutation_down_genes_global, df_seed04_mutation_down_genes_global, df_seed05_mutation_down_genes_global
#del df_seed01_selection_up_genes_global, df_seed02_selection_up_genes_global, df_seed03_selection_up_genes_global, df_seed04_selection_up_genes_global, df_seed05_selection_up_genes_global
#del df_seed01_selection_down_genes_global, df_seed02_selection_down_genes_global, df_seed03_selection_down_genes_global, df_seed04_selection_down_genes_global, df_seed05_selection_down_genes_global
#del df_seed01_pop_up_genes_global, df_seed02_pop_up_genes_global, df_seed03_pop_up_genes_global, df_seed04_pop_up_genes_global, df_seed05_pop_up_genes_global
del df_seed01_pop_down_genes_global, df_seed02_pop_down_genes_global, df_seed03_pop_down_genes_global, df_seed04_pop_down_genes_global, df_seed05_pop_down_genes_global
print("Done")

Deleting unneeded DataFrames
Done


<a id='statistics'></a>