# Genetic Inheritance Patterns: From Paper to Python
## A Jupyter Notebook for Science Teachers

### Introduction
This notebook demonstrates how to use Python to simulate genetic inheritance patterns, moving beyond traditional Punnett squares to explore inheritance over multiple generations with large sample sizes.

**Learning Objectives:**
- Understand how to represent genetic crosses computationally
- Compare theoretical predictions with simulated results
- Visualize inheritance patterns over multiple generations
- Analyze the difference between small and large sample sizes

In [None]:
# Cell 1: Import necessary libraries
import random
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

print("Libraries imported successfully!")
print("We'll use:")
print("- random: for simulating genetic crosses")
print("- matplotlib: for creating visualizations")
print("- pandas: for organizing our data")
print("- numpy: for mathematical calculations")

## Basic Genetics - Single Gene Cross

Let's start with a simple example: flower color inheritance
- R = Red (dominant allele)
- r = white (recessive allele)

**Traditional approach:** RR x Rr cross using Punnett square  
**Expected ratio:** 1 RR : 1 Rr : 0 rr (all red flowers)

In [None]:
def single_cross(parent1, parent2):
    """
    Simulate a genetic cross between two parents
    Each parent is represented as a list of two alleles
    """
    # Each parent contributes one random allele
    allele1 = random.choice(parent1)
    allele2 = random.choice(parent2)
    
    # Return the offspring genotype (sorted for consistency)
    return sorted([allele1, allele2])

# Define our parents
parent_RR = ['R', 'R']  # Homozygous dominant
parent_Rr = ['R', 'r']  # Heterozygous

# Test with a single cross
offspring = single_cross(parent_RR, parent_Rr)
print(f"Single cross result: {offspring}")
print(f"Phenotype: {'Red' if 'R' in offspring else 'White'}")

## Simulate Multiple Crosses

Now let's simulate many crosses to see if our results match the expected ratios

In [None]:
def simulate_crosses(parent1, parent2, num_crosses=100):
    """
    Simulate multiple genetic crosses and return results
    """
    results = []
    
    for i in range(num_crosses):
        offspring = single_cross(parent1, parent2)
        # Convert to string for easier counting
        genotype = ''.join(offspring)
        results.append(genotype)
    
    return results

# Simulate 1000 crosses
results_1000 = simulate_crosses(parent_RR, parent_Rr, 1000)

# Count the genotypes
from collections import Counter
genotype_counts = Counter(results_1000)

print("Results from 1000 crosses:")
for genotype, count in genotype_counts.items():
    percentage = (count/1000) * 100
    print(f"{genotype}: {count} ({percentage:.1f}%)")

print("\nExpected from theory:")
print("RR: 50% (500)")
print("Rr: 50% (500)")
print("rr: 0% (0)")

## Visualize the Results

Let's create a bar chart to compare our simulation with theoretical expectations

In [None]:
# Prepare data for plotting
genotypes = ['RR', 'Rr', 'rr']
simulated = [genotype_counts.get(g, 0) for g in genotypes]
theoretical = [500, 500, 0]  # Expected counts for 1000 crosses

# Create the plot
plt.figure(figsize=(10, 6))
x = np.arange(len(genotypes))
width = 0.35

plt.bar(x - width/2, simulated, width, label='Simulated', alpha=0.8, color='skyblue')
plt.bar(x + width/2, theoretical, width, label='Theoretical', alpha=0.8, color='lightcoral')

plt.xlabel('Genotype')
plt.ylabel('Number of Offspring')
plt.title('RR × Rr Cross: Simulated vs Theoretical Results (1000 crosses)')
plt.xticks(x, genotypes)
plt.legend()
plt.grid(axis='y', alpha=0.3)

# Add text annotations
for i, (sim, theo) in enumerate(zip(simulated, theoretical)):
    plt.text(i-width/2, sim + 10, str(sim), ha='center', va='bottom')
    plt.text(i+width/2, theo + 10, str(theo), ha='center', va='bottom')

plt.tight_layout()
plt.show()

## Effect of Sample Size

Let's explore how sample size affects the accuracy of our results

In [None]:
def test_sample_sizes():
    """
    Test different sample sizes to see how they affect accuracy
    """
    sample_sizes = [10, 50, 100, 500, 1000, 5000]
    rr_percentages = []
    
    for size in sample_sizes:
        results = simulate_crosses(parent_Rr, parent_Rr, size)  # Rr x Rr cross
        rr_count = results.count('rr')
        rr_percentage = (rr_count / size) * 100
        rr_percentages.append(rr_percentage)
    
    return sample_sizes, rr_percentages

# For Rr x Rr cross, we expect 25% rr (recessive phenotype)
sizes, percentages = test_sample_sizes()

plt.figure(figsize=(10, 6))
plt.plot(sizes, percentages, 'bo-', linewidth=2, markersize=8)
plt.axhline(y=25, color='red', linestyle='--', linewidth=2, label='Expected (25%)')
plt.xlabel('Sample Size')
plt.ylabel('Percentage of rr offspring')
plt.title('Effect of Sample Size on Genetic Cross Results (Rr × Rr)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xscale('log')

# Add annotations for each point
for size, percentage in zip(sizes, percentages):
    plt.annotate(f'{percentage:.1f}%', (size, percentage), 
                textcoords="offset points", xytext=(0,10), ha='center')

plt.tight_layout()
plt.show()

print("Notice how larger sample sizes give results closer to the theoretical expectation!")

## Multi-Generation Analysis

Let's simulate inheritance over multiple generations

In [None]:
def create_f2_generation(f1_population, num_offspring=1000):
    """
    Create F2 generation by randomly mating F1 individuals
    """
    f2_offspring = []
    
    for i in range(num_offspring):
        # Randomly select two parents from F1 population
        parent1 = random.choice(f1_population)
        parent2 = random.choice(f1_population)
        
        # Create offspring
        offspring = single_cross(parent1, parent2)
        f2_offspring.append(''.join(offspring))
    
    return f2_offspring

# Start with P generation: RR x rr
parent_RR = ['R', 'R']
parent_rr = ['r', 'r']

# F1 generation - all should be Rr
f1_generation = []
for i in range(100):  # Create 100 F1 individuals
    offspring = single_cross(parent_RR, parent_rr)
    f1_generation.append(offspring)

print("F1 Generation (first 10 individuals):")
for i, individual in enumerate(f1_generation[:10]):
    print(f"Individual {i+1}: {''.join(individual)}")

# F2 generation - should show 1:2:1 ratio
f2_results = create_f2_generation(f1_generation, 1000)
f2_counts = Counter(f2_results)

print(f"\nF2 Generation Results (1000 individuals):")
for genotype, count in f2_counts.items():
    percentage = (count/1000) * 100
    print(f"{genotype}: {count} ({percentage:.1f}%)")

print(f"\nExpected F2 ratios:")
print(f"RR: 25% (250)")
print(f"Rr: 50% (500)")
print(f"rr: 25% (250)")

## Interactive Exploration Function

Create a function that teachers and students can use to explore different crosses

In [None]:
def explore_genetic_cross(parent1_genotype, parent2_genotype, num_crosses=1000, show_plot=True):
    """
    Interactive function to explore any genetic cross
    
    Parameters:
    parent1_genotype: list of alleles for parent 1 (e.g., ['R', 'r'])
    parent2_genotype: list of alleles for parent 2 (e.g., ['R', 'R'])
    num_crosses: number of crosses to simulate
    show_plot: whether to display a plot
    """
    
    # Simulate the crosses
    results = simulate_crosses(parent1_genotype, parent2_genotype, num_crosses)
    counts = Counter(results)
    
    # Display results
    print(f"Cross: {''.join(parent1_genotype)} × {''.join(parent2_genotype)}")
    print(f"Results from {num_crosses} crosses:")
    print("-" * 30)
    
    for genotype in sorted(counts.keys()):
        count = counts[genotype]
        percentage = (count / num_crosses) * 100
        phenotype = "Red" if 'R' in genotype else "White"
        print(f"{genotype}: {count:4d} ({percentage:5.1f}%) - {phenotype}")
    
    if show_plot:
        plt.figure(figsize=(8, 5))
        genotypes = sorted(counts.keys())
        values = [counts[g] for g in genotypes]
        colors = ['red' if 'R' in g else 'white' for g in genotypes]
        edge_colors = ['black' if c == 'white' else 'darkred' for c in colors]
        
        bars = plt.bar(genotypes, values, color=colors, edgecolor=edge_colors, linewidth=2)
        plt.xlabel('Genotype')
        plt.ylabel('Number of Offspring')
        plt.title(f"{''.join(parent1_genotype)} × {''.join(parent2_genotype)} Cross Results")
        plt.grid(axis='y', alpha=0.3)
        
        # Add value labels on bars
        for bar, value in zip(bars, values):
            plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5,
                    str(value), ha='center', va='bottom')
        
        plt.tight_layout()
        plt.show()
    
    return counts

## Try Different Crosses

Now let's try some different genetic crosses

In [None]:
print("Exploring different genetic crosses:\n")

# Cross 1: Homozygous dominant × Homozygous recessive
print("1. RR × rr (should produce all Rr)")
explore_genetic_cross(['R', 'R'], ['r', 'r'], 500)

print("\n" + "="*50 + "\n")

# Cross 2: Heterozygous × Heterozygous  
print("2. Rr × Rr (should produce 1:2:1 ratio)")
explore_genetic_cross(['R', 'r'], ['R', 'r'], 1000)

print("\n" + "="*50 + "\n")

# Cross 3: Heterozygous × Homozygous recessive (test cross)
print("3. Rr × rr (test cross - should produce 1:1 ratio)")
explore_genetic_cross(['R', 'r'], ['r', 'r'], 1000)

## Classroom Discussion Questions

**Questions for students to explore:**

1. How do the simulation results compare with your Punnett square predictions?

2. What happens to the accuracy of results as you increase the number of crosses?

3. Why might real genetic crosses not perfectly match theoretical ratios?

4. How could you modify this code to simulate:
   - Incomplete dominance?
   - Codominance?
   - Multiple alleles?
   - Two-gene crosses?

5. What are the advantages of using computational simulations over traditional Punnett squares in genetics education?

In [None]:
print("Congratulations! You've completed the genetics simulation notebook.")
print("\nKey takeaways:")
print("- Computational simulations can verify theoretical predictions")
print("- Large sample sizes give more accurate results")
print("- Code allows exploration of complex scenarios beyond simple Punnett squares")
print("- Students can easily test hypotheses and see immediate results")

## Extension Activities

**Extension activities for advanced students:**

1. Modify the code to include multiple traits (dihybrid crosses)
2. Add incomplete dominance (pink flowers from red × white)
3. Simulate natural selection by having certain genotypes survive better
4. Create a pedigree analyzer that traces traits through family trees
5. Add mutations to the simulation

In [None]:
def dihybrid_cross_demo():
    """
    Bonus: Simple demonstration of a two-trait cross
    Traits: Flower color (R/r) and Plant height (T/t)
    """
    def create_gametes(genotype):
        """Create all possible gametes from a four-allele genotype"""
        # genotype is like ['R', 'r', 'T', 't']
        gametes = []
        for allele1 in genotype[:2]:  # First trait alleles
            for allele2 in genotype[2:]:  # Second trait alleles
                gametes.append([allele1, allele2])
        return gametes
    
    def dihybrid_offspring(parent1_genotype, parent2_genotype):
        """Create offspring from dihybrid cross"""
        p1_gametes = create_gametes(parent1_genotype)
        p2_gametes = create_gametes(parent2_genotype)
        
        # Random fertilization
        gamete1 = random.choice(p1_gametes)
        gamete2 = random.choice(p2_gametes)
        
        # Combine gametes to form offspring
        offspring = sorted(gamete1 + gamete2)
        return offspring
    
    # Example: RrTt × RrTt cross
    parent1 = ['R', 'r', 'T', 't']
    parent2 = ['R', 'r', 'T', 't']
    
    results = []
    for i in range(1600):  # 16×16×16 for good statistics of 16 phenotype classes
        offspring = dihybrid_offspring(parent1, parent2)
        # Determine phenotype
        color = "Red" if 'R' in offspring[:2] else "White"
        height = "Tall" if 'T' in offspring[2:] else "Short"
        phenotype = f"{color}-{height}"
        results.append(phenotype)
    
    phenotype_counts = Counter(results)
    print("Dihybrid Cross Results (RrTt × RrTt):")
    print("Expected ratio: 9 Red-Tall : 3 Red-Short : 3 White-Tall : 1 White-Short")
    print("\nSimulated results:")
    for phenotype, count in sorted(phenotype_counts.items()):
        expected_ratio = {"Red-Tall": 9, "Red-Short": 3, "White-Tall": 3, "White-Short": 1}
        expected = (expected_ratio[phenotype] / 16) * len(results)
        print(f"{phenotype}: {count} (expected: {expected:.0f})")

print("\nBonus: Dihybrid Cross Demonstration")
dihybrid_cross_demo()

print("\n" + "="*60)
print("END OF NOTEBOOK")
print("Save your work and experiment with different parameters!")
print("="*60)