In [1]:
%load_ext rpy2.ipython

from rpy2.robjects.packages import importr
import numpy as np
import pandas as pd
from chromax import Simulator
import tensorflow as tf
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib.animation import ArtistAnimation
import matplotlib.gridspec as gridspec
from tqdm import tqdm
import io
from PIL import Image
import imageio
import os
os.environ["KERAS_BACKEND"] = "jax"
import matplotlib




In [2]:

def plot_means_and_variances(dataframes):
    # Calculate the mean and variance for each dataframe
    mean_values = [df.mean() for df in dataframes]
    var_values = [df.var() for df in dataframes]
    var_values = np.array(var_values).flatten()
    mean_values = np.array(mean_values).flatten()

    # Create an array for the x-values
    x_values = range(len(dataframes))

    # Create the scatter plot with error bars
    plt.errorbar(x_values, mean_values, yerr=var_values, fmt='o')

    # Display the plot
    plt.show()


def generate_genetic_map(n_markers, n_chromosomes):
  df = pd.DataFrame(generate_marker_effects(n_markers=n_markers), columns=['Yield'])
  df['cM'] = np.random.uniform(0, 200, len(df))
  df['CHR.PHYS'] = '1A'
  df = df.sort_values(by='cM')
  df = df[['CHR.PHYS', 'cM', 'Yield']]
  # save df as csv under filename
  return df

def generate_population(n_pop=100, n_markers=500):
    """
    Generate a numpy array of randoms of length 500 with randomized 0, 1, or 2 at each position.
    It will generate 100 individuals based on n_pop.

    Returns: numpy array of size (n_pop, n_markers)
    """
    shape=(n_pop, n_markers, 2)
    # Define the elements to choose from and their associated probabilities
    elements = [0, 1, 2]
    probabilities = [1/3, 1/3, 1/3]  # equal probabilities for 0, 1, and 2

    # Generate the population
    population = np.random.choice(elements, size=(n_pop, n_markers), p=probabilities)

    return np.random.choice([True, False], size=shape)


def generate_marker_effects(n_markers=500, mu=0, sigma=0.1):
    """
    Generate a numpy array of marker effects with a normal distribution.

    Parameters:
    n_markers (int): Number of markers.
    mu (float): Mean of the distribution.
    sigma (float): Standard deviation of the distribution.

    Returns:
    numpy array of marker effects
    """

    # Generate the marker effects
    marker_effects = np.random.normal(mu, sigma, n_markers)

    return marker_effects


def select_random_individuals(arr, num_individuals):
    # Get the shape of the array
    shape = arr.shape

    # Generate random indices along the first axis
    idx = np.random.choice(shape[0], size=num_individuals)

    # Select the random individuals

    return random_individuals

def select_mixed(population, random_split=.99):
  n_pop = population.shape[0]

  n_random = int(n_pop * random_split)
  n_select = int(n_pop * (1-random_split))

  random_parents = select_random_individuals(Farm.current_population, n_random)
  selected_parents = Farm.Simulator.select(Farm.current_population, k = n_select)
  combined_arr = np.concatenate((random_parents, selected_parents), axis=0)
  return combined_arr

def plot_replicate_means_and_variances(replicate_data, start_index=None, end_index=None):
    # Create a new figure and axes
    fig, ax = plt.subplots()

    # If start_index or end_index is not provided, set them to default values
    if start_index is None:
        start_index = 0
    if end_index is None:
        end_index = len(replicate_data[0])

    # For each list of dataframes in replicate_data
    for i, dataframes in enumerate(replicate_data):
        # Select the dataframes in the specified range
        dataframes = dataframes[start_index:end_index]

        # Calculate the mean and variance for each dataframe
        mean_values = [df.mean() for df in dataframes]
        var_values = [df.var() for df in dataframes]

        # Flatten the var_values and mean_values lists to 1D arrays
        var_values = np.array(var_values).flatten()
        mean_values = np.array(mean_values).flatten()

        # Create an array for the x-values
        x_values = range(len(dataframes))

        # Plot the means with error bars for the variances
        ax.errorbar(x_values, mean_values, yerr=var_values, fmt='o', label=f'Replicate {i+1}')

    # Add a legend
    ax.legend()

    # Display the plot
    plt.show()


def plot_replicate_means(replicate_data):
    # Create a new figure and axes
    fig, ax = plt.subplots()

    # For each list of dataframes in replicate_data
    for i, dataframes in enumerate(replicate_data):
        # Calculate the mean for each dataframe
        mean_values = [df.mean() for df in dataframes]
        # Flatten the mean_values list to a 1D array
        mean_values = np.array(mean_values).flatten()

        # Create an array for the x-values
        x_values = range(len(dataframes))

        # Plot the means as a line plot
        ax.plot(x_values, mean_values, label=f'Replicate {i+1}')

    # Add a legend
    ax.legend()

    # Display the plot
    plt.show()

def parse_markerEffects(genetic_map, nChr):
    # Get the length of the genetic map
    length = len(genetic_map)

    # Create a new array for storing the chromosome number for each marker
    chr = [0] * length

    # Calculate the number of markers per chromosome
    markers_per_chr = length // nChr

    # Iterate over the range of the genetic map length
    for i in range(length):
        # Calculate the chromosome number and store it in the chr array
        chr[i] = i // markers_per_chr + 1

    return chr

def score_top(scores: pd.DataFrame, column: str, k: int):
    # Sort the DataFrame from high to low
    sorted_scores = scores.sort_values(by=column, ascending=False)
    # Get the top K indexes
    top_k_indexes = sorted_scores.head(k).index
    return top_k_indexes


def score_top_percentile(scores: pd.DataFrame, column: str, percentile_min: float, percentile_max: float, k: int):
    # Ensure max percentile is greater than min percentile
    assert percentile_max > percentile_min, "Error: max percentile should be greater than min percentile"
    
    # Calculate the percentiles
    lower = scores[column].quantile(percentile_min)
    upper = scores[column].quantile(percentile_max)
    # Filter the DataFrame
    filtered_scores = scores[(scores[column] >= lower) & (scores[column] <= upper)]
    # Sample k random indexes
    sampled_indexes = np.random.choice(filtered_scores.index, k, replace=True)

    return sampled_indexes

def reshape_pop(maizeHaplo):
    reshapeHaplo = maizeHaplo.reshape(int((maizeHaplo.shape[0])/2),2,maizeHaplo.shape[1])
    reshapeHaplo = reshapeHaplo.transpose((0,2,1))
    return reshapeHaplo

def return_genetic_map_df(markerEffects, nChr, geneticMap):
    chr = parse_markerEffects(markerEffects, nChr)
    chr = [int(x[0]) for x in chr]
    trait = markerEffects
    pos = geneticMap
    # Assuming chr, trait, pos are your arrays
    df = pd.DataFrame({'CHR.PHYS': chr, 'Yield': trait, 'cM': pos})
    return df

import matplotlib.pyplot as plt

def plot_histogram(ax, probabilities, label=None, color=None):
    ax.hist(probabilities, bins='auto', density=True, alpha=0.5, label=label, color=color)
    ax.set_title('Probability Distribution')
    ax.set_xlabel('Probability')
    ax.set_ylabel('Frequency')

def plot_bar(ax, probabilities, label=None, color=None):
    indices = range(len(probabilities))
    ax.bar(indices, probabilities, alpha=0.5, label=label, color=color)
    ax.set_title('Probability per Index')
    ax.set_xlabel('Index')
    ax.set_ylabel('Probability')

def plot_probabilities(probabilities1, probabilities2):
    # Create subplots: 1 row, 2 columns
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))

    # Call the plot functions for the first set of probabilities
    plot_histogram(axs[0], probabilities1, label='Probabilities 1', color='blue')
    plot_bar(axs[1], probabilities1, label='Probabilities 1', color='blue')

    # Call the plot functions for the second set of probabilities
    plot_histogram(axs[0], probabilities2, label='Probabilities 2', color='red')
    plot_bar(axs[1], probabilities2, label='Probabilities 2', color='red')

    # Add legends
    axs[0].legend(loc='upper right')
    axs[1].legend(loc='upper right')

    # Display the plots
    plt.tight_layout()  # Adjusts subplot params so that subplots fit in the figure area
    plt.show()





In [3]:
%%R
x <- seq(0, 2*pi, length.out=50)

In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
  libraries ‘/usr/local/lib/R/site-library’, ‘/usr/lib/R/site-library’ contain no packages


In [4]:
%R -o x

In [5]:
x

array([0.        , 0.12822827, 0.25645654, 0.38468481, 0.51291309,
       0.64114136, 0.76936963, 0.8975979 , 1.02582617, 1.15405444,
       1.28228272, 1.41051099, 1.53873926, 1.66696753, 1.7951958 ,
       1.92342407, 2.05165235, 2.17988062, 2.30810889, 2.43633716,
       2.56456543, 2.6927937 , 2.82102197, 2.94925025, 3.07747852,
       3.20570679, 3.33393506, 3.46216333, 3.5903916 , 3.71861988,
       3.84684815, 3.97507642, 4.10330469, 4.23153296, 4.35976123,
       4.48798951, 4.61621778, 4.74444605, 4.87267432, 5.00090259,
       5.12913086, 5.25735913, 5.38558741, 5.51381568, 5.64204395,
       5.77027222, 5.89850049, 6.02672876, 6.15495704, 6.28318531])

In [6]:
# %R install.packages("AlphaSimR")

In [7]:
%%R
library("AlphaSimR")

Loading required package: R6


In [8]:
%%R
nInd = 50
nChr = 2
segSites = 20

founderGenomes = runMacs(nInd = nInd,
                         nChr = nChr,
                         segSites = segSites,
                         species = "MAIZE")


In [9]:
%%R
SP = SimParam$new(founderGenomes)
SP$addTraitA(segSites)
# SP$setVarE(h2=.02)
pop = newPop(founderGenomes, simParam=SP)
ans = fastRRBLUP(pop, simParam=SP, useQtl=TRUE, use='gv')
ans@gv[[1]]@addEff
markerEffects = slot(slot(ans, "gv")[[1]], "addEff")
maizeHaplo = pullSegSiteHaplo(pop)
maizeGeno = pullSegSiteGeno(pop)
#cm positions of each marker
genMap = SP$genMap
geneticMap = unlist(genMap)

In [10]:
%R -o maizeHaplo
%R -o maizeGeno
%R -o markerEffects
%R -o geneticMap
%R -o nInd
%R -o nChr
%R -o segSites

In [11]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input, Flatten, Reshape


import random

import random

def simplify_geneticmap(lst, qtl):
    # Select 'qtl' random indexes
    indexes_positive = random.sample(range(len(lst)), qtl)
    
    # Select 'qtl' random indexes not already selected
    remaining_indexes = set(range(len(lst))) - set(indexes_positive)
    indexes_negative = random.sample(remaining_indexes, qtl)

    # Modify the list
    for i in range(len(lst)):
        if i in indexes_positive:
            lst[i] = random.uniform(0.5, 1.0)  # Assign random float between 0.5 and 1.0
        elif i in indexes_negative:
            lst[i] = random.uniform(-0.5, -1.0)  # Assign random float between -0.5 and -1.0
        else:
            lst[i] = 0  # Assign 0

    return lst



import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def load_data(file_path):
    """
    Load the numpy array from the specified file path.
    """
    return np.load(file_path)

def sum_ploidy_values(data):
    """
    Sum the ploidy values in the data.
    """
    return np.sum(data, axis=2)

def create_heatmap(data, title="Heatmap of Ploidy Values", xlabel="Marker Index", ylabel="Individual Index"):
    """
    Create a heatmap from the provided data.
    """
    # Choosing a visually appealing color scheme
    cmap = sns.color_palette(["#1f77b4", "#ff7f0e", "#2ca02c"], as_cmap=True)

    plt.figure(figsize=(12, 10))
    ax = sns.heatmap(data, cmap=cmap, cbar=False)

    # Creating a color bar manually
    cbar = ax.figure.colorbar(ax.collections[0])
    cbar.set_ticks([0.33, 1, 1.67])
    cbar.set_ticklabels(['0', '1', '2'])
    cbar.set_label('Summed Ploidy Value')

    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    plt.show()

In [67]:
def plot_population_heatmap(ax, population_data, marker_strength):
    summed_data = np.sum(population_data, axis=2)
    cmap = sns.color_palette(["#1f77b4", "#ff7f0e", "#2ca02c"])
    strength_cmap = matplotlib.colormaps.get_cmap('RdYlGn')  # Updated line
    sns.heatmap(summed_data, cmap=cmap, cbar_kws={'ticks': [0, 1, 2]}, ax=ax)

    ax.set_xticks(np.arange(population_data.shape[1]) + 0.5)
    marker_labels = [f'{i+1}' for i in range(population_data.shape[1])]
    ax.set_xticklabels(marker_labels, rotation=0, ha='center')

    for tick_label, strength in zip(ax.get_xticklabels(), marker_strength):
        tick_label.set_backgroundcolor(strength_cmap((strength + 1) / 2))  # Normalize and map the strength value
        tick_label.set_color('white')
        tick_label.set_fontweight('bold')
        tick_label.set_bbox(dict(facecolor=strength_cmap((strength + 1) / 2), edgecolor='none', boxstyle='round,pad=0.3'))

    ax.set_title("Heatmap of Population Genotype Dosages")
    ax.set_xlabel("Markers")
    ax.set_ylabel("Individuals")

def plot_allele_frequencies(ax, data, marker_strength, sort_by_allele_0=False, bar_width=0.8):
    allele_counts = np.apply_along_axis(lambda x: np.bincount(x, minlength=3), axis=2, arr=data)
    total_allele_counts = allele_counts.sum(axis=0)
    allele_frequencies = total_allele_counts / total_allele_counts.sum(axis=1)[:, np.newaxis]

    allele_0_proportions = allele_frequencies[:, 0]
    allele_1_proportions = allele_frequencies[:, 1]
    x_positions = np.arange(1, len(allele_0_proportions) + 1)
    normalized_strength = (np.array(marker_strength) + 1) / 2

    cmap = plt.cm.RdYlGn

    if sort_by_allele_0:
        sorted_indices = np.argsort(-allele_0_proportions)
        allele_0_proportions = allele_0_proportions[sorted_indices]
        allele_1_proportions = allele_1_proportions[sorted_indices]
        normalized_strength = normalized_strength[sorted_indices]

    for xpos, a0, a1, strength in zip(x_positions, allele_0_proportions, allele_1_proportions, normalized_strength):
        ax.bar(xpos, a0, color='red', edgecolor='black', width=bar_width, label='Allele 0' if xpos == 1 else "")
        ax.bar(xpos, a1, bottom=a0, color='black', edgecolor='black', width=bar_width, label='Allele 1' if xpos == 1 else "")
        ax.text(xpos, -0.05, f'{xpos}', horizontalalignment='center', verticalalignment='center', 
                 color='white', fontsize=8, fontweight='bold', 
                 bbox=dict(facecolor=cmap(strength), edgecolor='none', boxstyle='round,pad=0.2'))

    ax.set_ylim(-0.15, 1)
    ax.set_ylabel('Proportion')
    ax.set_xlabel('Marker Position')
    ax.set_title('Proportion of Alleles 0 and 1 at Each Marker' + (' (Sorted by Allele 0)' if sort_by_allele_0 else ''))
    ax.legend(loc='upper right')

import tempfile
import os

def create_frame(farm, episode):
    temp_dir = farm.temp_dir
    fig = plt.figure(figsize=(10, 15))  # Adjust the figure size

    gs = gridspec.GridSpec(3, 1, figure=fig)

    ax1 = fig.add_subplot(gs[0])
    farm.view_policy(ax1, episode)

    ax2 = fig.add_subplot(gs[1])
    plot_population_heatmap(ax2, farm.current_population, farm.marker_strength)

    ax3 = fig.add_subplot(gs[2])
    plot_allele_frequencies(ax3, farm.current_population, farm.marker_strength)

    # Save the figure as a png in the temporary directory
    filename = os.path.join(temp_dir.name, f'frame_{episode}.png')
    plt.savefig(filename)
    plt.close(fig)  # Close the plot

    return filename  # Return the filename for future use
def select_parents(policy):
    # Calculate the number of top elements to select.
    k = policy.shape[1] // 2
    # If the tensor has an odd number of elements, add one to 'k' to get the upper half.
    if policy.shape[1] % 2 != 0:
        k += 1
    values, indices = tf.math.top_k(policy, k)
    return values,indices


In [72]:
select_parents(actor_output)

(<tf.Tensor: shape=(1, 25), dtype=float32, numpy=
 array([[ 0.5799658 ,  0.54185057,  0.5192132 ,  0.4949935 ,  0.4750669 ,
          0.46124786,  0.38514245,  0.26339433,  0.24395137,  0.2260183 ,
          0.16272944,  0.15549074,  0.14651424,  0.12909198,  0.11385362,
          0.11339369,  0.09402908,  0.06812983,  0.04635124,  0.01456115,
          0.01100537, -0.0177543 , -0.03391749, -0.0550363 , -0.06925011]],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 25), dtype=int32, numpy=
 array([[ 3,  5, 34, 16, 48, 26, 37, 45, 30, 27,  1, 36, 20, 38,  6, 35,
         40, 19, 47, 29, 31, 23,  7, 10, 46]], dtype=int32)>)

In [81]:
import numpy as np
from tensorflow.keras.layers import Input, Flatten, Dense, Reshape
from tensorflow.keras import Model
import tensorflow as tf

def create_critic(n, m, d, population_dummy, scores_dummy, output_dummy):
    # Create an additional input for the scores and actor's output
    score_input = tf.keras.layers.Input(shape=(n,))
    actor_output_input = tf.keras.layers.Input(shape=output_dummy.shape[1:])

    # Define the critic model
    critic_input = tf.keras.layers.Input(shape=(n, m, d))

    x1 = Flatten()(critic_input)
    x2 = Dense(64, activation='relu')(score_input)  # Dense layer for the scores
    x3 = Flatten()(actor_output_input)  # Flatten the actor's output
    x = tf.keras.layers.Concatenate()([x1, x2, x3])  # Concatenate the flattened critic input, score input, and actor's output
    x = Dense(64, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(1, activation='linear')(x)  # Output layer with single linear unit

    critic_model = tf.keras.models.Model([critic_input, score_input, actor_output_input], x)
    critic_model.compile(optimizer='adam', loss='mean_squared_error')  # Use MSE loss for value prediction

    return critic_model

def create_actor(n, m, d, total_parents, population_dummy, scores_dummy):
    # Create an additional input for the scores
    score_input = tf.keras.layers.Input(shape=(n,))

    # Define the actor model
    actor_input = tf.keras.layers.Input(shape=(n, m, d))

    x1 = Flatten()(actor_input)
    x2 = Dense(64, activation='relu')(score_input)  # Dense layer for the scores
    x = tf.keras.layers.Concatenate()([x1, x2])  # Concatenate the flattened actor input and the score input
    x = Dense(64, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(n, activation='linear')(x)  # Output layer with n linear units

    actor_model = tf.keras.models.Model([actor_input, score_input], x)
    actor_model.compile(optimizer='adam', loss='mean_squared_error')  # Use MSE loss for value prediction

    return actor_model

import numpy as np


#dummy functions to  generate fake data to develop the training pipeline
def pop_gen(b, n, m, d):
    return np.random.randint(2, size=(b, n, m, d))
def reward_gen():
    return np.random.rand
def scores_gen(n):
    return np.random.rand(1, n)

n = 50
m = 30
d = 2
total_parents = n*2
population_dummy = pop_gen(1, n, m, d)  # Extra dimension for batch size
scores_dummy =  scores_gen(n) # Extra dimension for batch size
#init actor and critic models
actor_model = create_actor(n,m,d,total_parents, population_dummy, scores_dummy)
# Feed the dummy data through the network to get an example for creating critic model
actor_output = actor_model([population_dummy, scores_dummy])
critic_model = create_critic(n,m,d, population_dummy, scores_dummy,actor_output)


#HOW TO TAKE ACTION
policy = actor_output # find the policy
parent_values, parent_indices = select_parents(policy) #select parents
selected_parents = population_dummy[0][parent_indices.numpy()] #grab parents from our current population
#HOW TO CALCULATE REWARD
past_fitness = scores_gen(n)
new_fitness = scores_gen(n)
reward = new_fitness-past_fitness

# update the actor
with tf.GradientTape() as tape:
    new_policy = actor_model([population_dummy, scores_dummy], training=True)  # compute new policy with actor
    actor_loss = -tf.reduce_mean(critic_model([population_dummy, scores_dummy, new_policy]))  # compute actor loss
# Get the gradients
actor_grad = tape.gradient(actor_loss, actor_model.trainable_variables)
# Update the weights
actor_model.optimizer.apply_gradients(zip(actor_grad, actor_model.trainable_variables))

# update the critic
with tf.GradientTape() as tape:
    critic_value = critic_model([population_dummy, scores_dummy, actor_output], training=True)  # compute critic value
    critic_loss = tf.keras.losses.MSE(reward, critic_value)  # compute critic loss
# Get the gradients
critic_grad = tape.gradient(critic_loss, critic_model.trainable_variables)
# Update the weights
critic_model.optimizer.apply_gradients(zip(critic_grad, critic_model.trainable_variables))

<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [70]:
actor_output[0].shape

TensorShape([50])

In [None]:


# Transition to next state of environment (new population)
next_generation = farm.simulator.random_crosses(selected_parents, n_crosses = selected_parents.shape[0] * 2)

# Calculate reward
old_fitness = farm.simulator.GEBV(farm.current_population).mean()[0]
new_fitness = farm.simulator.GEBV(next_generation).mean()[0]
reward = new_fitness - old_fitness  # the reward for taking this action to generate next_generation


TensorShape([1, 1])

In [16]:
class BreedingProgram:
    """
    Represents a breeding program with a PPO agent.
    """

    def __init__(self, initial_population, genetic_map, population_size, marker_count, chromosome_number, max_generation, heritability):
        """
        Initializes the breeding program.
        """

        # Initialize the basic attributes
        self.population_size = population_size
        self.marker_count = marker_count
        self.initial_population = initial_population
        self.genetic_map = genetic_map
        self.max_generation = max_generation
        self.marker_strength = np.array(self.genetic_map['Yield'])

        # Initialize the simulator
        self.simulator = Simulator(genetic_map=self.genetic_map, h2=heritability)
        self.simulator.load_population('mypop.npy')

        #tempdir
        self.temp_dir = tempfile.TemporaryDirectory()

#actor_model = create_actor(n,m,d,total_parents, population_dummy, scores_dummy)

        # Initialize the current generation and history
        self.current_generation = 0
        self.history = []

        # Initialize the Actor and Critic models
        #actor_model = create_actor(n,m,d,total_parents, population_dummy, scores_dummy)

        self.actor = create_actor(n=self.population_size,
                                  m=self.marker_count,
                                  d=2,
                                  total_parents = self.population_size,
                                  population_dummy = self.initial_population, 
                                  scores_dummy=self.simulator.phenotype(self.initial_population)
                                  )
        population_dummy = np.random.rand(1, self.population_size,self.marker_count,2)  # Extra dimension for batch size
        scores_dummy = np.random.rand(1, self.population_size)  # Extra dimension for batch size
        output_dummy = self.actor([population_dummy, scores_dummy])
        self.critic = create_critic(n=self.population_size,
                                  m=self.marker_count,
                                  d=2,
                                  population_dummy = self.initial_population, 
                                  scores_dummy=self.simulator.phenotype(self.initial_population),
                                  output_dummy = output_dummy)
        

        self.actor_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
        self.critic_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
        self.critic_history = []
        self.actor_history = []

                # Start the breeding program
        self._start_breeding_program()
        
    def _start_breeding_program(self):
        """
        Starts the breeding program.
        """
        self.current_population = self.initial_population
        self.current_scores = self.simulator.GEBV(self.initial_population)
        self.history.append(self.current_scores)
 
    def run_episodes(self, num_episodes, num_cycles):
        #...
        pass

    def view_policy(self, ax, episode):
        current_pop = self.current_population.reshape(1,*self.current_population.shape)
        policy = self.actor(current_pop)
        sns.heatmap(policy[0], ax=ax)
        ax.set_xlabel('Individual ID')
        ax.set_ylabel('Parent Slot ID')
        ax.annotate('Episode: {}'.format(episode), xy=(0.5, 1.05), xycoords='axes fraction', fontsize=12, ha='center')






initial_population =  reshape_pop(maizeHaplo) 
genetic_map = return_genetic_map_df(markerEffects, nChr, geneticMap)
genetic_map['Yield'] = simplify_geneticmap(list(genetic_map['Yield']),5)
reshapeHaplo = reshape_pop(maizeHaplo)
np.save('mypop', reshapeHaplo)
print(f'reshape haplo {reshapeHaplo.shape}')
population_size = int(nInd)
marker_count = int((segSites * nChr))
chromosome_number = int(nChr)
max_generation = 10
heritability = .95
mean_score_list=[]
critic_loss_list=[]

farm  = BreedingProgram(initial_population, genetic_map, population_size, marker_count, chromosome_number, max_generation, heritability)

No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


reshape haplo (50, 40, 2)


In [None]:
with tf.GradientTape() as tape:
    actor_output = farm.actor([current_pop, scores_dummy])  # policy
    critic_output = farm.critic([current_pop, scores_dummy, actor_output])  # value
    chosen_actions_prob = tf.math.log(actor_output)  # log probability of the chosen actions
    actor_loss = -tf.reduce_mean(chosen_actions_prob * advantages)  # actor loss

grads = tape.gradient(actor_loss, farm.actor.trainable_variables)
optimizer.apply_gradients(zip(grads, farm.actor.trainable_variables))


In [None]:
from tensorflow.keras.losses import MeanSquaredError
critic_loss_function = MeanSquaredError()


In [None]:
@tf.function
def update_actor_and_critic(actor_model, critic_model, states, advantages, critic_target):
    
    with tf.GradientTape() as tape:
        actor_output = actor_model(states)
        critic_output = critic_model([states, actor_output])
        actor_loss = -tf.reduce_mean(advantages * tf.math.log(actor_output))
        critic_loss = critic_loss_function(critic_target, critic_output)

    actor_grads = tape.gradient(actor_loss, actor_model.trainable_variables)
    critic_grads = tape.gradient(critic_loss, critic_model.trainable_variables)
    optimizer.apply_gradients(zip(actor_grads, actor_model.trainable_variables))
    optimizer.apply_gradients(zip(critic_grads, critic_model.trainable_variables))


In [60]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

# Define the optimizer and the loss function
optimizer = Adam(learning_rate=0.001)
critic_loss_function = MeanSquaredError()

# Define the actor and critic update steps
@tf.function
def update_actor_and_critic(actor_model, critic_model, states, scores, advantages, critic_target):
    with tf.GradientTape() as tape:
        actor_output = actor_model([states, scores])  # policy
        critic_output = critic_model([states, scores, actor_output])  # value
        actor_loss = -tf.reduce_mean(advantages * tf.math.log(actor_output))
        critic_loss = critic_loss_function(critic_target, critic_output)



    actor_grads = tape.gradient(actor_loss, actor_model.trainable_variables)
    critic_grads = tape.gradient(critic_loss, critic_model.trainable_variables)
    farm.actor_optimizer.apply_gradients(zip(actor_grads, actor_model.trainable_variables))
    farm.critic_optimizer.apply_gradients(zip(critic_grads, critic_model.trainable_variables))
    del tape

# Inputs to actor/critic
current_pop = farm.current_population.reshape(1, *farm.current_population.shape)
current_scores = farm.simulator.phenotype(farm.current_population).to_numpy()
current_scores = current_scores.reshape(1, *current_scores.shape)
#
#.reshape(1, *farm.current_population.shape)
#
#


# Run a single episode
# Forward pass
actor_output = farm.actor([dummy_pop, scores_dummy])  # policy
critic_output = farm.critic([current_pop, scores_dummy, actor_output])  # value


AttributeError: Exception encountered when calling layer 'model_2' (type Functional).

'tuple' object has no attribute 'rank'

Call arguments received by layer 'model_2' (type Functional):
  • inputs=['Array([[[[0, 0],\n         [0, 1],\n         [1, 1],\n         ...,\n         [0, 0],\n         [0, 0],\n         [0, 0]],\n\n        [[0, 1],\n         [0, 0],\n         [1, 1],\n         ...,\n         [0, 0],\n         [0, 1],\n         [0, 1]],\n\n        [[0, 0],\n         [0, 1],\n         [1, 0],\n         ...,\n         [0, 0],\n         [0, 1],\n         [0, 0]],\n\n        ...,\n\n        [[0, 0],\n         [0, 1],\n         [1, 0],\n         ...,\n         [0, 0],\n         [0, 0],\n         [0, 0]],\n\n        [[0, 0],\n         [1, 1],\n         [0, 0],\n         ...,\n         [0, 0],\n         [0, 1],\n         [0, 0]],\n\n        [[0, 0],\n         [0, 0],\n         [1, 1],\n         ...,\n         [0, 0],\n         [0, 0],\n         [0, 0]]]], dtype=int32)', 'tf.Tensor(shape=(1, 50), dtype=float64)']
  • training=None
  • mask=None

In [None]:

# Parse the actor_output to take an action
parent_values, parent_indices = select_parents(actor_output)
selected_parents = farm.current_population[parent_indices.numpy()][0]

# Transition to next state of environment (new population)
next_generation = farm.simulator.random_crosses(selected_parents, n_crosses = selected_parents.shape[0] * 2)

# Calculate reward
old_fitness = farm.simulator.GEBV(farm.current_population).mean()[0]
new_fitness = farm.simulator.GEBV(next_generation).mean()[0]
reward = new_fitness - old_fitness  # the reward for taking this action to generate next_generation

# Critic loss
critic_predicted_reward = critic_output.numpy()[0][0]
advantages = reward - critic_predicted_reward

# Update actor and critic
update_actor_and_critic(farm.actor, farm.critic, current_pop, current_scores.reshape(-1), advantages, reward)

# Update current population and scores for the next episode
farm.current_population = next_generation
#current_scores = farm.simulator.phenotype(next_generation).to_numpy()


In [None]:
current_pop = farm.current_population.reshape(1, *farm.current_population.shape)

actor_output = farm.actor([current_pop, current_scores])  # policy


In [None]:
current_pop.shape

In [None]:
current_scores.shape

In [27]:
# a single episode 
current_pop = farm.current_population.reshape(1, *farm.current_population.shape)
current_scores = farm.simulator.phenotype(farm.current_population).to_numpy().flatten()

In [32]:
farm.actor([current_pop, scores_dummy])

<tf.Tensor: shape=(1, 50), dtype=float32, numpy=
array([[ 6.5104151e-01, -2.8516501e-01, -3.4279385e-01,  2.4405798e-01,
        -2.9868206e-01,  6.2417991e-02, -3.4670165e-01,  1.5136723e-01,
         1.8660007e-01, -4.3060362e-01,  1.0519144e-01, -2.4274936e-01,
         7.1200669e-01, -4.9227658e-03, -8.6381203e-01, -8.2548894e-04,
        -6.6038392e-02,  1.6787472e-01,  4.1797993e-01, -6.0664785e-01,
        -3.4991795e-01, -1.4222504e+00, -2.2188221e-01, -1.8743233e-01,
        -2.1070190e-01, -8.4726505e-02, -2.2246145e-02,  8.3695501e-01,
         8.6771190e-01, -7.5732344e-01,  1.0696792e-01,  8.3783931e-01,
         3.1456056e-01,  2.3440292e-01,  6.4454299e-01,  4.6602714e-01,
        -7.4099526e-02, -4.2517554e-02, -2.0585846e-02,  1.0848941e-01,
         4.1412947e-01,  1.4267309e-01,  4.5503116e-01, -9.3657285e-01,
        -3.9549991e-01,  5.9971593e-02,  5.6688387e-02,  1.8309337e-01,
        -4.4215167e-01, -7.1446031e-01]], dtype=float32)>

In [36]:
scores_dummy.shape

(1, 50)

In [43]:
farm.actor([current_pop,])

<tf.Tensor: shape=(1, 50), dtype=float32, numpy=
array([[ 0.54233706, -0.28029063, -0.44808167,  0.19406004, -0.22346362,
         0.01603362, -0.44298393,  0.14528964,  0.0976283 , -0.41562054,
         0.04504631, -0.25273275,  0.62266946,  0.00256989, -0.7960678 ,
        -0.03216837, -0.03567402,  0.0985531 ,  0.36002424, -0.6431509 ,
        -0.30197492, -1.3364706 , -0.28009248, -0.12712568, -0.199123  ,
        -0.1536542 , -0.09904246,  0.66717166,  0.6916788 , -0.6355116 ,
         0.07733408,  0.7528592 ,  0.24210514,  0.16027087,  0.5271198 ,
         0.5624838 , -0.15323204, -0.00502504,  0.02770197,  0.08231469,
         0.46222824,  0.18528374,  0.46578988, -0.81121594, -0.3826443 ,
        -0.04107046,  0.13703369,  0.14923783, -0.47860432, -0.62885785]],
      dtype=float32)>

In [21]:

actor_output = farm.actor([current_pop, scores_dummy]) #policy
critic_output = farm.critic([current_pop, scores_dummy, actor_output]) #value

#parse the actor_output to take an action
parent_values, parent_indices = select_parents(actor_output)
selected_parents = farm.current_population[parent_indices.numpy()][0]

#transition to next state of environment (new population)
next_generation = farm.simulator.random_crosses(selected_parents, n_crosses = selected_parents.shape[0] * 2)

#calculate reward
old_fitness = farm.simulator.GEBV(farm.current_population).mean()[0]
new_fitness = farm.simulator.GEBV(next_generation).mean()[0]
reward = new_fitness - old_fitness #the reward should we take this action to generate next_generation

#critic loss
critic_predicted_reward = critic_output.numpy()[0][0]
advantages = reward - critic_predicted_reward



In [23]:
scores_dummy

array([[0.96790104, 0.57597318, 0.77145901, 0.36883692, 0.41810905,
        0.84119367, 0.73621826, 0.23072492, 0.27568311, 0.7827208 ,
        0.67130558, 0.1994031 , 0.4780718 , 0.67408037, 0.10682971,
        0.19155974, 0.52064394, 0.70057062, 0.24263135, 0.54435504,
        0.76543685, 0.13545422, 0.60136103, 0.96635435, 0.05361849,
        0.30705308, 0.54165819, 0.60590054, 0.39387241, 0.15398243,
        0.48752292, 0.1426055 , 0.09996266, 0.93993927, 0.98191053,
        0.03446852, 0.05859439, 0.76646634, 0.37575338, 0.11958848,
        0.56648797, 0.6571133 , 0.41338017, 0.7346084 , 0.82894075,
        0.10326268, 0.86270544, 0.51344379, 0.19989252, 0.14959542]])

array([ 0.55843306,  0.07487825,  1.9057657 ,  0.02157572, -0.23536834,
       -0.6472284 ,  0.1889464 , -1.3179084 , -0.6954651 , -2.8239932 ,
       -0.8645679 , -0.07158399,  0.22153953, -1.2799115 ,  0.49403834,
        0.14468414, -2.5399718 , -1.2592124 , -0.4882877 , -1.4522421 ,
       -1.1792893 , -0.7705883 , -0.69329464, -0.19911563, -0.9629164 ,
        1.4614671 , -1.0485626 , -1.8964022 ,  1.0860348 , -1.0965081 ,
       -2.6374998 ,  0.9308336 , -0.46369982,  1.0218074 , -1.1255081 ,
       -1.0093731 ,  1.1463205 , -0.864039  , -1.4443319 ,  1.994973  ,
        0.28011262,  1.2341009 , -1.280933  , -1.5605255 , -0.7278145 ,
        0.5520846 , -1.5805368 , -1.0526664 , -1.272587  ,  1.017198  ],
      dtype=float32)

In [None]:
optimizer = farm.actor_optimizer

In [None]:
grads = tape.gradient(actor_loss, farm.actor.trainable_variables)
optimizer.apply_gradients(zip(grads, farm.actor.trainable_variables))


The actor loss is the negative average of the log probabilities of the selected actions, weighted by the advantage. The negative sign is used because we want to perform gradient ascent to maximize the expected reward, but TensorFlow's optimizers perform gradient descent to minimize the loss.

In [None]:
import tensorflow as tf

actor_loss = -tf.reduce_mean(advantages * tf.math.log(actor_output))

In [None]:
optimizer = farm.actor_optimizer

with tf.GradientTape() as tape:
   actor_loss = -tf.reduce_mean(advantages * tf.math.log(actor_output))
grads = tape.gradient(actor_loss, farm.actor.trainable_variables)
optimizer.apply_gradients(zip(grads, farm.actor.trainable_variables))

In [None]:
#create init population + genetic map
n = int(nInd)
m = int((segSites * nChr))
d = 2
total_parents = n*2 # pop size per cycle



def calculate_true_reward(simulator, current_population, new_population):
    true_reward = simulator.GEBV(new_population).mean() - simulator.GEBV(current_population).mean()
    return true_reward

def calculate_critic_loss(true_reward, critic_output):
    return (true_reward - critic_output)**2
    



def calculate_actor_loss(actor_output, selected_array, advantages):
    actor_log_probs = tf.math.log(actor_output)
    selected_log_probs = tf.gather(actor_log_probs, selected_array, axis=1)
    loss = -tf.reduce_sum(selected_log_probs * advantages)
    return loss


@tf.function
def actor_train_step(actor_model, optimizer, loss, example_population, selected_array, advantages):
    with tf.GradientTape() as tape:
        predicted_actions = actor_model(example_population)
        actor_loss = loss(predicted_actions, selected_array, advantages)
    gradients = tape.gradient(actor_loss, actor_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, actor_model.trainable_variables))
    return actor_loss



@tf.function
def critic_train_step(critic_model, optimizer, example_population, actor_output, past_fitness):
    with tf.GradientTape() as tape:
        predicted_rewards = critic_model([ example_population, actor_output])
        critic_loss = (past_fitness - predicted_rewards) ** 2
    gradients = tape.gradient(critic_loss, critic_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, critic_model.trainable_variables))
    return critic_loss

In [None]:

future_rewards = self.simulator.GEBV(new_population).mean()[0]
advantages = future_rewards - critic_output.numpy()[0][0]

actor_loss = actor_train_step(self.actor, self.actor_optimizer, calculate_actor_loss, current_pop, selected_array, advantages)
critic_loss = critic_train_step(self.critic, self.critic_optimizer, current_pop, actor_output, past_fitness=future_rewards)

mean_score_list.append(self.simulator.GEBV(self.current_population).mean()[0])
self.critic_history.append(critic_loss.numpy()[0][0])
self.actor_history.append(actor_loss)

self.current_population = np.array(new_population)
self.current_scores = self.simulator.GEBV(self.current_population)
self.history.append(self.current_scores)

In [None]:
current_pop = farm.current_population.reshape(1, *farm.current_population.shape)
current_scores = farm.simulator.phenotype(farm.current_population).to_numpy()
actor_output = farm.actor([current_pop, scores_dummy])

In [None]:
actor_output

In [None]:
selected_parents = select_parents(actor_output)[1].numpy()

In [None]:
selected_parents

In [None]:
next_generation = farm.simulator.random_crosses(farm.current_population[selected_parents][0],n_crosses = len(selected_parents)*2)

In [None]:
print("Top 50% values: ", values.numpy())
print("Their indices: ", indices.numpy())


In [None]:
actor_output[0].shape

In [None]:
[select_parent(x) for x in actor_output[0]]

In [None]:

critic_model = create_critic(n, m, d, population_dummy, scores_dummy, actor_output)
critic_output = critic_model([population_dummy, scores_dummy, actor_output])
print(critic_output)


The actor loss is the negative average of the log probabilities of the selected actions, weighted by the advantage. The negative sign is used because we want to perform gradient ascent to maximize the expected reward, but TensorFlow's optimizers perform gradient descent to minimize the loss.

In [None]:
def truncate_cycle(x):
    nInd = x.current_population.shape[0]
    top5 = x.simulator.select(x.current_population,k=nInd // 2)
    new_pop = x.simulator.random_crosses(top5,n_crosses=nInd)
    x.history.append(x.simulator.GEBV(new_pop))
    x.current_population = new_pop

In [None]:
trunk_farm = farm
for i in range(10):
    truncate_cycle(trunk_farm)

data = [x.to_numpy().flatten() for x in trunk_farm.history]
df = pd.DataFrame(data).T
# Create boxplot for each column (list)
plt.figure(figsize=(12, 6))  # Optional: You can set the figure size
sns.boxplot(data=df)
plt.show()


In [None]:
len(farm.history)

In [None]:
my_frames = farm.run_episodes(150,10)

In [None]:
images = []
for filename in my_frames:
    images.append(imageio.imread(filename))
imageio.mimsave('training_animation.gif', images, loop=1)


In [None]:
#average fitness for each episode
import matplotlib.pyplot as plt
import pandas as pd

mean_score_list = [x.mean() for x in farm.history]
# Assuming mean_score_list is a list containing your data
mean_score_series = pd.Series(mean_score_list)

# Calculate the rolling average
rolling_mean = mean_score_series.rolling(window=100).mean()

# Create the plot
plt.figure(figsize=(10, 5))
plt.plot(mean_score_series, label='Original')
plt.plot(rolling_mean, 'r-', label='Rolling Average')
plt.title("episode # vs critic loss")
plt.legend(loc='upper left')
plt.show()


In [None]:
data = [x.to_numpy().flatten() for x in farm.history]
df = pd.DataFrame(data).T
# Create boxplot for each column (list)
plt.figure(figsize=(12, 6))  # Optional: You can set the figure size
sns.boxplot(data=df)
plt.show()


In [None]:
plt.plot(farm.critic_history)

In [None]:
plt.plot(farm.actor_history)

In [None]:
plt.plot([x.mean()[0] for x in farm.history])