<a href="https://colab.research.google.com/github/drscghosh/Testing/blob/master/DialGrifM3Model1WithoutCheck.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import itertools
import statsmodels.api as sm
import statsmodels.formula.api as smf

# -------------------------------
# Simulate a diallel mating design (Griffing’s Method 3)
# -------------------------------

# Define parents
parents = ['P1', 'P2', 'P3', 'P4']
replications = ['R1', 'R2', 'R3']

# Generate unique crosses (without reciprocals)
def generate_crosses(parents):
    crosses = []
    for i, p1 in enumerate(parents):
        for j, p2 in enumerate(parents[i:], i):  # include self crosses
            crosses.append((p1, p2))
    return crosses

crosses = generate_crosses(parents)

# Simulate data
np.random.seed(42)
data = []
for rep in replications:
    for p1, p2 in crosses:
        yield_val = np.random.normal(loc=12.0, scale=2.0)
        data.append([rep, p1, p2, yield_val])

df = pd.DataFrame(data, columns=['Replication', 'Parent1', 'Parent2', 'Yield'])

# -------------------------------
# Griffing Method 3 Model: Fixed effects
# -------------------------------

# Create unique genotype identifier (sorted for symmetry)
df['Genotype'] = df.apply(lambda x: '-'.join(sorted([x['Parent1'], x['Parent2']])), axis=1)

# Fit the model
model = smf.ols('Yield ~ C(Genotype) + C(Replication)', data=df).fit()

# Print the ANOVA table
anova_table = sm.stats.anova_lm(model, typ=2)
print("ANOVA Table:")
print(anova_table)

# -------------------------------
# Estimate GCA and SCA effects
# -------------------------------

# Calculate Genotype Means
genotype_means = df.groupby('Genotype')['Yield'].mean()

# Calculate overall mean
grand_mean = df['Yield'].mean()

# Extract parents from genotype strings
gca_effects = {p: 0.0 for p in parents}
n_parents = len(parents)

# Count number of crosses for each parent (diagonal and off-diagonal)
cross_matrix = {p: [] for p in parents}
for g in genotype_means.index:
    p1, p2 = g.split('-')
    cross_matrix[p1].append(g)
    if p1 != p2:
        cross_matrix[p2].append(g)

# Estimate GCA
for p in parents:
    gca_effects[p] = np.mean([genotype_means[g] for g in cross_matrix[p]]) - grand_mean

# Estimate SCA
sca_effects = {}
for g in genotype_means.index:
    p1, p2 = g.split('-')
    expected = grand_mean + gca_effects[p1] + (0 if p1 == p2 else gca_effects[p2])
    sca = genotype_means[g] - expected
    sca_effects[g] = sca

# -------------------------------
# Output GCA and SCA
# -------------------------------
print("\nGCA Effects:")
for k, v in gca_effects.items():
    print(f"{k}: {v:.4f}")

print("\nSCA Effects:")
for k, v in sca_effects.items():
    print(f"{k}: {v:.4f}")


ANOVA Table:
                   sum_sq    df         F    PR(>F)
C(Genotype)     24.566293   9.0  1.271600  0.316324
C(Replication)  30.756640   2.0  7.164109  0.005143
Residual        38.638409  18.0       NaN       NaN

GCA Effects:
P1: 0.3449
P2: -0.3909
P3: 0.3348
P4: -0.5469

SCA Effects:
P1-P1: 1.0307
P1-P2: -0.1308
P1-P3: 0.3348
P1-P4: -0.6317
P2-P2: -0.9017
P2-P3: -0.0246
P2-P4: 0.9244
P3-P3: 1.0131
P3-P4: -0.7303
P4-P4: 0.1489
