In [None]:
%load_ext rpy2.ipython

In [None]:
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects

In [None]:
import keras_core as keras
from keras_core import layers

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from chromax import Simulator, sample_data
import pathlib

In [None]:

def plot_means_and_variances(dataframes):
    # Calculate the mean and variance for each dataframe
    mean_values = [df.mean() for df in dataframes]
    var_values = [df.var() for df in dataframes]
    var_values = np.array(var_values).flatten()
    mean_values = np.array(mean_values).flatten()

    # Create an array for the x-values
    x_values = range(len(dataframes))

    # Create the scatter plot with error bars
    plt.errorbar(x_values, mean_values, yerr=var_values, fmt='o')

    # Display the plot
    plt.show()


def generate_genetic_map(n_markers, n_chromosomes):
  df = pd.DataFrame(generate_marker_effects(n_markers=n_markers), columns=['Yield'])
  df['cM'] = np.random.uniform(0, 200, len(df))
  df['CHR.PHYS'] = '1A'
  df = df.sort_values(by='cM')
  df = df[['CHR.PHYS', 'cM', 'Yield']]
  # save df as csv under filename
  return df

def generate_population(n_pop=100, n_markers=500):
    """
    Generate a numpy array of randoms of length 500 with randomized 0, 1, or 2 at each position.
    It will generate 100 individuals based on n_pop.

    Returns: numpy array of size (n_pop, n_markers)
    """
    shape=(n_pop, n_markers, 2)
    # Define the elements to choose from and their associated probabilities
    elements = [0, 1, 2]
    probabilities = [1/3, 1/3, 1/3]  # equal probabilities for 0, 1, and 2

    # Generate the population
    population = np.random.choice(elements, size=(n_pop, n_markers), p=probabilities)

    return np.random.choice([True, False], size=shape)


def generate_marker_effects(n_markers=500, mu=0, sigma=0.1):
    """
    Generate a numpy array of marker effects with a normal distribution.

    Parameters:
    n_markers (int): Number of markers.
    mu (float): Mean of the distribution.
    sigma (float): Standard deviation of the distribution.

    Returns:
    numpy array of marker effects
    """

    # Generate the marker effects
    marker_effects = np.random.normal(mu, sigma, n_markers)

    return marker_effects

import numpy as np

def select_random_individuals(arr, num_individuals):
    # Get the shape of the array
    shape = arr.shape

    # Generate random indices along the first axis
    idx = np.random.choice(shape[0], size=num_individuals)

    # Select the random individuals

    return random_individuals

def select_mixed(population, random_split=.99):
  n_pop = population.shape[0]

  n_random = int(n_pop * random_split)
  n_select = int(n_pop * (1-random_split))

  random_parents = select_random_individuals(Farm.current_population, n_random)
  selected_parents = Farm.Simulator.select(Farm.current_population, k = n_select)
  combined_arr = np.concatenate((random_parents, selected_parents), axis=0)
  return combined_arr

def plot_replicate_means_and_variances(replicate_data, start_index=None, end_index=None):
    # Create a new figure and axes
    fig, ax = plt.subplots()

    # If start_index or end_index is not provided, set them to default values
    if start_index is None:
        start_index = 0
    if end_index is None:
        end_index = len(replicate_data[0])

    # For each list of dataframes in replicate_data
    for i, dataframes in enumerate(replicate_data):
        # Select the dataframes in the specified range
        dataframes = dataframes[start_index:end_index]

        # Calculate the mean and variance for each dataframe
        mean_values = [df.mean() for df in dataframes]
        var_values = [df.var() for df in dataframes]

        # Flatten the var_values and mean_values lists to 1D arrays
        var_values = np.array(var_values).flatten()
        mean_values = np.array(mean_values).flatten()

        # Create an array for the x-values
        x_values = range(len(dataframes))

        # Plot the means with error bars for the variances
        ax.errorbar(x_values, mean_values, yerr=var_values, fmt='o', label=f'Replicate {i+1}')

    # Add a legend
    ax.legend()

    # Display the plot
    plt.show()


def plot_replicate_means(replicate_data):
    # Create a new figure and axes
    fig, ax = plt.subplots()

    # For each list of dataframes in replicate_data
    for i, dataframes in enumerate(replicate_data):
        # Calculate the mean for each dataframe
        mean_values = [df.mean() for df in dataframes]
        # Flatten the mean_values list to a 1D array
        mean_values = np.array(mean_values).flatten()

        # Create an array for the x-values
        x_values = range(len(dataframes))

        # Plot the means as a line plot
        ax.plot(x_values, mean_values, label=f'Replicate {i+1}')

    # Add a legend
    ax.legend()

    # Display the plot
    plt.show()

def parse_markerEffects(genetic_map, nChr):
    # Get the length of the genetic map
    length = len(genetic_map)

    # Create a new array for storing the chromosome number for each marker
    chr = [0] * length

    # Calculate the number of markers per chromosome
    markers_per_chr = length // nChr

    # Iterate over the range of the genetic map length
    for i in range(length):
        # Calculate the chromosome number and store it in the chr array
        chr[i] = i // markers_per_chr + 1

    return chr
import jax.numpy as jnp
import numpy as np

def score_top(scores: pd.DataFrame, column: str, k: int):
    # Sort the DataFrame from high to low
    sorted_scores = scores.sort_values(by=column, ascending=False)
    # Get the top K indexes
    top_k_indexes = sorted_scores.head(k).index
    return top_k_indexes


def score_top_percentile(scores: pd.DataFrame, column: str, percentile_min: float, percentile_max: float, k: int):
    # Ensure max percentile is greater than min percentile
    assert percentile_max > percentile_min, "Error: max percentile should be greater than min percentile"
    
    # Calculate the percentiles
    lower = scores[column].quantile(percentile_min)
    upper = scores[column].quantile(percentile_max)
    # Filter the DataFrame
    filtered_scores = scores[(scores[column] >= lower) & (scores[column] <= upper)]
    # Sample k random indexes
    sampled_indexes = np.random.choice(filtered_scores.index, k, replace=True)

    return sampled_indexes

def reshape_pop(maizeHaplo):
    reshapeHaplo = maizeHaplo.reshape(int((maizeHaplo.shape[0])/2),2,maizeHaplo.shape[1])
    reshapeHaplo = reshapeHaplo.transpose((0,2,1))
    return reshapeHaplo

def return_genetic_map_df(markerEffects, nChr, geneticMap):
    chr = parse_markerEffects(markerEffects, nChr)
    chr = [int(x[0]) for x in chr]
    trait = markerEffects
    pos = geneticMap
    # Assuming chr, trait, pos are your arrays
    df = pd.DataFrame({'CHR.PHYS': chr, 'Yield': trait, 'cM': pos})
    return df



In [None]:
%%R
x <- seq(0, 2*pi, length.out=50)

In [None]:
%R -o x

In [None]:
x

In [None]:
# %R install.packages("AlphaSimR")

In [None]:
%%R
library("AlphaSimR")

In [None]:
%%R
nInd = 50
nChr = 3
segSites = 10

founderGenomes = runMacs(nInd = nInd,
                         nChr = nChr,
                         segSites = segSites,
                         species = "MAIZE")


In [None]:
%%R
SP = SimParam$new(founderGenomes)
SP$addTraitA(segSites)
# SP$setVarE(h2=.02)
pop = newPop(founderGenomes, simParam=SP)
ans = fastRRBLUP(pop, simParam=SP, useQtl=TRUE, use='gv')
ans@gv[[1]]@addEff
markerEffects = slot(slot(ans, "gv")[[1]], "addEff")
maizeHaplo = pullSegSiteHaplo(pop)
maizeGeno = pullSegSiteGeno(pop)
#cm positions of each marker
genMap = SP$genMap
geneticMap = unlist(genMap)

In [None]:
%R -o maizeHaplo
%R -o maizeGeno
%R -o markerEffects
%R -o geneticMap
%R -o nInd
%R -o nChr
%R -o segSites

In [None]:
class BreedingProgram:
    """
    Represents a breeding program with a PPO agent.
    """

    def __init__(self, initial_population, genetic_map, population_size, marker_count, chromosome_number, max_generation, heritability):
        """
        Initializes the breeding program.
        """

        # Initialize the basic attributes
        self.population_size = population_size
        self.marker_count = marker_count
        self.initial_population = initial_population
        self.genetic_map = genetic_map
        self.max_generation = max_generation

        # Initialize the simulator
        self.simulator = Simulator(genetic_map=self.genetic_map, h2=heritability)
        self.simulator.load_population('mypop.npy')

        # Initialize the current generation and history
        self.current_generation = 0
        self.history = []

        # Start the breeding program
        self._start_breeding_program()
        
    def _start_breeding_program(self):
        """
        Starts the breeding program.
        """
        self.current_population = self.initial_population
        self.current_scores = self.simulator.GEBV(reshape_pop(self.initial_population))
        self.history.append(self.current_scores)





initial_population = maizeGeno 
genetic_map = return_genetic_map_df(markerEffects, nChr, geneticMap)
reshapeHaplo = reshape_pop(maizeHaplo)
np.save('mypop', reshapeHaplo)


population_size = int(nInd)
marker_count = int((segSites * nChr))
chromosome_number = int(nChr)
generation_max = 10
heritability = .5

farm = BreedingProgram(initial_population, genetic_map, population_size, marker_count, chromosome_number, generation_max, heritability)

In [None]:
mysim =  Simulator(genetic_map=genetic_map, h2=.5)

In [212]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input, Flatten

#create init population + genetic map
n = int(nInd)
m = int((segSites * nChr))
total_parents = 100

# Define the actor model
actor_input = keras.layers.Input(shape=(n, m))
x = Flatten()(actor_input)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(n * total_parents, activation='linear')(x)
x = Reshape((total_parents, n))(x)
actor_output = keras.layers.Softmax(axis=-1)(x)
actor_model = keras.models.Model(actor_input, actor_output)
actor_model.compile(optimizer='adam', loss='categorical_crossentropy')

# Define the critic model
critic_input1 = keras.layers.Input(shape=(n, m))
critic_input2 = keras.layers.Input(shape=(total_parents, n))

x1 = Flatten()(critic_input1)
x1 = Dense(64, activation='relu')(x1)

x2 = Flatten()(critic_input2)
x2 = Dense(64, activation='relu')(x2)

combined = keras.layers.concatenate([x1, x2])

x3 = Dense(64, activation='relu')(combined)
critic_output = Dense(n, activation='linear')(x3)

critic_model = keras.models.Model([critic_input1, critic_input2], critic_output)
critic_model.compile(optimizer='adam', loss='mse')




In [213]:
import numpy as np

# Create a random example
example_population = np.random.rand(1, n, m)
# Send the example through the actor network
actor_output = actor_model.predict(example_population)
print(f'actor model output : {actor_output.shape}')
print(f'example population shape, single sample : {example_population.shape}')

# Predict
critic_output = critic_model.predict([ example_population, actor_output])

# Print the output
print(f'critic model output: {critic_output.shape}')
print(critic_output[0][0:5])

actor model output : (1, 100, 50)
example population shape, single sample : (1, 50, 30)
critic model output: (1, 50)
[-0.14453392 -0.21117567 -0.61733145  0.11062396  0.07448893]


In [195]:
mid = actor_output[0,0,:]
lat = actor_output[0,:,0]

print(mid.shape)
print(lat.shape)

(50,)
(50,)


In [None]:
print(n,m)

In [None]:
actor_model[0]

In [None]:
actor_output[0,:,0]

In [None]:
example_population.shape

In [None]:
# Next steps
# 1) Given the output from actor model -> for each pair grab the parents -> use chromax to simulate offspring -> generate new population
# 2 ) probably calculate fitness / loss from the prediction from critic model
# ...
# 4 ) write training loop

In [None]:
actor_output.shape

In [None]:
# must decide how to format/process the output consistent
# we get 100 lists of 50 values.
# each value is probability of selecting that index for the selected parent for that pair
# pair parents is index 1/2, 3/4, 5/6 , ...
# if same parent in both slot 1/2 then we will do DH line?



In [None]:
import matplotlib.pyplot as plt

def plot_histogram(ax, probabilities, label=None, color=None):
    ax.hist(probabilities, bins='auto', density=True, alpha=0.5, label=label, color=color)
    ax.set_title('Probability Distribution')
    ax.set_xlabel('Probability')
    ax.set_ylabel('Frequency')

def plot_bar(ax, probabilities, label=None, color=None):
    indices = range(len(probabilities))
    ax.bar(indices, probabilities, alpha=0.5, label=label, color=color)
    ax.set_title('Probability per Index')
    ax.set_xlabel('Index')
    ax.set_ylabel('Probability')

def plot_probabilities(probabilities1, probabilities2):
    # Create subplots: 1 row, 2 columns
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))

    # Call the plot functions for the first set of probabilities
    plot_histogram(axs[0], probabilities1, label='Probabilities 1', color='blue')
    plot_bar(axs[1], probabilities1, label='Probabilities 1', color='blue')

    # Call the plot functions for the second set of probabilities
    plot_histogram(axs[0], probabilities2, label='Probabilities 2', color='red')
    plot_bar(axs[1], probabilities2, label='Probabilities 2', color='red')

    # Add legends
    axs[0].legend(loc='upper right')
    axs[1].legend(loc='upper right')

    # Display the plots
    plt.tight_layout()  # Adjusts subplot params so that subplots fit in the figure area
    plt.show()


In [None]:
plot_probabilities(actor_output[:,0,:][0], actor_output[:,1,:][0])

In [None]:
actor_output[:,:,0][0].shape

In [None]:
def select_parent(probabilities):
    indices = np.arange(len(probabilities))  # Create an array of indices
    sampled_index = np.random.choice(indices, p=probabilities)  # Sample an index based on the probabilities
    return sampled_index

selected_pairs = [select_parent(x) for x in actor_output[0,:,:]]
index_pairs = list(zip(selected_pairs[::2], selected_pairs[1::2]))

In [None]:
print(actor_output[0,:,:].shape) # 50, 100
#go to 100 indexes
selected_pairs = [select_parent(x) for x in actor_output[0,:,:]]
print(len(selected_pairs)) # 50

In [None]:
actor_output[0,:,0]

In [None]:
indices = np.arange(len(actor_output[0,:,0]))
np.random.choice(indices,p=actor_output[0,:,0])