<a href="https://colab.research.google.com/github/drscghosh/Testing/blob/master/Heterosis_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from itertools import combinations

# 1. Simulate sample data
np.random.seed(42)
num_parents = 10
num_checks = 2
num_replicates = 3

# Parent, hybrid and check names
parents = [f"P{i+1}" for i in range(num_parents)]
hybrids = [f"{p1}x{p2}" for p1, p2 in combinations(parents, 2)]
checks = [f"Check{i+1}" for i in range(num_checks)]
genotypes = parents + hybrids + checks

# Total genotypes
data_rows = []

for genotype in genotypes:
    for rep in range(1, num_replicates + 1):
        # Simulated phenotypic value: base + random variation
        base_value = np.random.normal(50, 10)
        rep_effect = np.random.normal(0, 2)
        y = base_value + rep_effect
        data_rows.append({
            "Genotype": genotype,
            "Replication": rep,
            "Trait": y
        })

# Create DataFrame
df = pd.DataFrame(data_rows)

# 2. Fixed effect model (RCBD with genotype as fixed)
model = smf.ols('Trait ~ C(Genotype) + C(Replication)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

print("\n=== ANOVA Table ===")
print(anova_table)

# 3. Get genotype means
means = df.groupby("Genotype")["Trait"].mean()

# 4. Calculate Heterosis (mid-parent and better-parent)
heterosis_data = []

for hybrid in hybrids:
    p1, p2 = hybrid.split('x')
    mid_parent = (means[p1] + means[p2]) / 2
    better_parent = max(means[p1], means[p2])
    hybrid_mean = means[hybrid]

    mid_parent_het = ((hybrid_mean - mid_parent) / mid_parent) * 100
    better_parent_het = ((hybrid_mean - better_parent) / better_parent) * 100

    heterosis_data.append({
        "Hybrid": hybrid,
        "Hybrid Mean": hybrid_mean,
        "Mid Parent Mean": mid_parent,
        "Better Parent Mean": better_parent,
        "Mid Parent Heterosis (%)": mid_parent_het,
        "Better Parent Heterosis (%)": better_parent_het
    })

heterosis_df = pd.DataFrame(heterosis_data)

print("\n=== Heterosis Estimates ===")
print(heterosis_df.round(2))



=== ANOVA Table ===
                      sum_sq     df         F    PR(>F)
C(Genotype)      4326.515041   56.0  0.765708  0.865591
C(Replication)    153.991316    2.0  0.763096  0.468629
Residual        11300.697941  112.0       NaN       NaN

=== Heterosis Estimates ===
    Hybrid  Hybrid Mean  Mid Parent Mean  Better Parent Mean  \
0    P1xP2        47.41            53.26               53.80   
1    P1xP3        53.43            47.02               53.80   
2    P1xP4        42.83            51.92               53.80   
3    P1xP5        53.07            48.14               53.80   
4    P1xP6        48.82            52.10               53.80   
5    P1xP7        46.13            50.74               53.80   
6    P1xP8        46.02            48.49               53.80   
7    P1xP9        54.46            51.37               53.80   
8   P1xP10        46.24            53.30               53.80   
9    P2xP3        52.98            46.48               52.72   
10   P2xP4        59.1