In [1]:
import torch
from typing import Tuple
import numpy as np
import pandas as pd

In [2]:
def generate_population(num_populations: int, num_individuals: int, num_markers: int, ploidy: int) -> torch.Tensor:
    """
    Generate a population tensor for plant breeding.

    Parameters:
    num_populations (int): The number of populations.
    num_individuals (int): The number of individuals per population.
    num_markers (int): The number of genetic markers.
    ploidy (int): The ploidy level of the plants (e.g., diploid, haploid).

    Returns:
    torch.Tensor: A tensor of shape (num_populations, num_individuals, num_markers, ploidy)
                  with binary values representing the genetic makeup.
    """
    population_tensor = torch.randint(2, (num_populations, num_individuals, num_markers, ploidy), dtype=torch.int8)
    return population_tensor


# Example test
num_populations = 3
num_individuals = 50
num_markers = 100
ploidy = 2

test_tensor = generate_population(num_populations, num_individuals, num_markers, ploidy)
assert test_tensor.shape == (num_populations, num_individuals, num_markers, ploidy)
assert test_tensor.dtype == torch.int8
assert test_tensor.ge(0).all() and test_tensor.le(1).all(), "Tensor should only contain 0s and 1s"

In [3]:
def generate_map(number_markers:int):
    """The genetic map represents the rules of the game. It assigns the truth value to each marker.

    :param number_markers: the total number of markers to include in this genetic map. must correspond to population shape
    :type number_markers: int 
    """
    # 'chr' will always be '1A' for every marker
    chr_array = ['1A'] * number_markers
    
    # 'yield': Create a marker_strength array with 1 float between -0.5 and +0.5 randomly
    # yield_array = np.random.poisson(np.random.randint(1,10), size=number_markers)
    poisson_values = np.random.poisson(np.random.randint(1, 10), size=number_markers)

    # Scale the Poisson values to the range [0, 1]
    scaled_poisson_values = poisson_values / np.max(poisson_values)

    # Stretch and shift the values to the range [-1, 1]
    yield_array = (scaled_poisson_values * 2) - 1
    
    # 'cM': create an array for number_markers length evenly sampled between 0 and 100
    cM_array = np.linspace(0, 100, num=number_markers)
    
    # Create the DataFrame with the auto-generated data
    df = pd.DataFrame({'CHR.PHYS': chr_array, 'Yield': yield_array, 'cM': cM_array*.01})
    
    return df

num_markers = 100
map = generate_map(num_markers)

In [4]:
def generate_scores(test_pop: torch.Tensor, test_map: pd.DataFrame) -> torch.Tensor:
    """
    Generate scores for each individual in each population based on the genetic markers and yield values.

    Parameters:
    test_pop (torch.Tensor): A tensor representing the population with shape (num_populations, num_individuals, num_markers, ploidy).
    test_map (pd.DataFrame): A DataFrame containing the 'Yield' values for each marker.

    Returns:
    torch.Tensor: A tensor of shape (num_populations, num_individuals) representing the scores of each individual.
    """
    # Sum the binary ploidy axes for each individual
    summed_ploidy = test_pop.sum(dim=-1)
    
    # Convert test_map['Yield'] to a tensor and perform matrix multiplication
    yield_values = torch.tensor(test_map['Yield'].values, dtype=torch.float32)
    scores = torch.matmul(summed_ploidy.float(), yield_values)
    
    return scores

In [5]:
import torch

def get_top_indices(scores: torch.Tensor, top_x: int) -> torch.Tensor:
    """
    Get the indices of the top X scores for each population.

    Parameters:
    scores (torch.Tensor): A tensor of shape (num_populations, num_individuals) representing the scores of each individual.
    top_x (int): The number of top indices to return for each population.

    Returns:
    torch.Tensor: A tensor containing the indices of the top X scores for each population.
    """
    # Use torch.topk to get the top X values and their indices for each population
    top_values, top_indices = torch.topk(scores, k=top_x, dim=1, largest=True, sorted=True)

    return top_indices


In [6]:
def select_top_individuals(test_pop: torch.Tensor, top_index: torch.Tensor) -> torch.Tensor:
    """
    Select the top individuals from each population based on the top indices.

    Parameters:
    test_pop (torch.Tensor): A tensor of shape (num_populations, num_individuals, num_markers, ploidy)
                             representing the populations.
    top_index (torch.Tensor): A tensor of shape (num_populations, top_x) containing the indices of the top
                              individuals in each population.

    Returns:
    torch.Tensor: A tensor of shape (num_populations, top_x, num_markers, ploidy) containing the top individuals.
    """
    # Get the number of populations and the number of top individuals to select
    num_populations, top_x = top_index.shape
    
    # Create a tensor of population indices that correspond to each individual
    population_indices = torch.arange(num_populations).view(-1, 1).expand(-1, top_x).reshape(-1)
    
    # Expand top_index to match the total number of individuals selected
    expanded_top_index = top_index.reshape(-1)
    
    # Select the top individuals across the second dimension (individuals)
    selected_individuals = torch.index_select(test_pop.view(-1, *test_pop.shape[2:]), 0, population_indices * test_pop.shape[1] + expanded_top_index)
    
    # Reshape the result to the desired output shape
    selected_individuals = selected_individuals.view(num_populations, top_x, *test_pop.shape[2:])
    
    return selected_individuals


In [16]:
import torch

def generate_offspring(selected_parents: torch.Tensor, total_offspring: int) -> torch.Tensor:
    """
    Generate offspring tensors by randomly inheriting one ploidy value from each of the parents.

    Parameters:
    selected_parents (torch.Tensor): A tensor of shape (population, individual, marker, ploidy)
                                     representing the selected parents.
    total_offspring (int): The total number of offspring to generate for each population.

    Returns:
    torch.Tensor: A tensor representing the offspring with shape (population, total_offspring, marker, ploidy).
    """
    num_populations, num_individuals, num_markers, ploidy = selected_parents.shape
    
    # Initialize an empty tensor for the offspring with the correct shape
    offspring = torch.empty((num_populations, total_offspring, num_markers, ploidy), dtype=torch.int8)
    
    # Iterate over each population
    for pop in range(num_populations):
        # Generate offspring for the population
        for off in range(total_offspring):
            # Randomly select two parents for each offspring
            parents_idx = torch.randint(0, num_individuals, (2,))
            for marker in range(num_markers):
                # Randomly select one ploidy value from each parent for each marker
                alleles = torch.stack([
                    selected_parents[pop, parents_idx[0], marker, torch.randint(0, ploidy, (1,))],
                    selected_parents[pop, parents_idx[1], marker, torch.randint(0, ploidy, (1,))]
                ])
                offspring[pop, off, marker] = alleles.squeeze()

    return offspring


In [8]:
# Example test
num_populations = 3
num_individuals = 50
num_markers = 100
ploidy = 2

test_pop = generate_population(num_populations, num_individuals, num_markers, ploidy)
test_map = generate_map(num_markers)
test_score = generate_scores(test_pop, test_map)

print(type(test_pop)) # torch.tensor
print(type(test_map)) # pandas.DataFrame
print(type(test_map['Yield'])) # pandas.Series


print(test_pop.shape) # torch.Size([3, 50, 100, 2])
print(test_map['Yield'].shape) #(100,)
print(test_score.shape)

<class 'torch.Tensor'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
torch.Size([3, 50, 100, 2])
(100,)
torch.Size([3, 50])


In [13]:
top_index = get_top_indices(test_score, 6)
selected_parents = select_top_individuals(test_pop, top_index)

In [17]:
# Assuming selected_parents is a tensor with the shape (batch, population, individual, marker, ploidy)
offspring = generate_offspring(selected_parents,10)

In [18]:
offspring.shape

torch.Size([3, 10, 100, 2])