<a href="https://colab.research.google.com/github/drscghosh/Testing/blob/master/DiallelGriffingMethod3Model1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
import itertools
import statsmodels.api as sm
from statsmodels.formula.api import ols

# -------------------------
# Step 1: Simulate data
# -------------------------
parents = ['P1', 'P2', 'P3', 'P4']
check_varieties = ['Check1', 'Check2']

# Generate F1 crosses (no reciprocals, no selfs)
crosses = [f"{p1}x{p2}" for i, p1 in enumerate(parents) for p2 in parents[i+1:]]

# Combine all entries
entries = parents + crosses + check_varieties

# Simulate replicated data (e.g., 3 replicates per entry)
np.random.seed(0)
data = []
for entry in entries:
    for rep in range(1, 4):  # 3 replicates
        yield_value = np.random.normal(loc=100, scale=10)  # simulate yield data
        data.append({'Genotype': entry, 'Replication': rep, 'Yield': yield_value})

df = pd.DataFrame(data)

# -------------------------
# Step 2: Create ANOVA model
# -------------------------
# Model: Yield ~ Replication + Genotype (fixed effects)
model = ols('Yield ~ C(Replication) + C(Genotype)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

print("\nANOVA Table (Fixed Effects Model):")
print(anova_table)

# -------------------------
# Step 3: Estimate GCA & SCA
# -------------------------
# Extract only parent and cross data (exclude checks)
diallel_df = df[~df['Genotype'].isin(check_varieties)].copy()

# Label each row with Parent1 and Parent2
def get_parents(genotype):
    if 'x' in genotype:
        return genotype.split('x')
    else:
        return [genotype, genotype]

diallel_df[['Parent1', 'Parent2']] = diallel_df['Genotype'].apply(lambda g: pd.Series(get_parents(g)))

# Estimate GCA
# Mean per parent (including as parent1 or parent2)
all_parents = list(set(parents))
gca_values = {}
overall_mean = diallel_df['Yield'].mean()

for parent in all_parents:
    parent_data = diallel_df[(diallel_df['Parent1'] == parent) | (diallel_df['Parent2'] == parent)]
    gca_values[parent] = parent_data['Yield'].mean() - overall_mean

# Estimate SCA
sca_values = {}
for cross in crosses:
    p1, p2 = cross.split('x')
    cross_data = diallel_df[diallel_df['Genotype'] == cross]
    cross_mean = cross_data['Yield'].mean()
    sca = cross_mean - overall_mean - gca_values[p1] - gca_values[p2]
    sca_values[cross] = sca

# -------------------------
# Step 4: Display results
# -------------------------
print("\nGCA Effects:")
for parent, gca in gca_values.items():
    print(f"{parent}: {gca:.3f}")

print("\nSCA Effects:")
for cross, sca in sca_values.items():
    print(f"{cross}: {sca:.3f}")

# -------------------------
# Step 5: Compare checks
# -------------------------
print("\nCheck Variety Means:")
for check in check_varieties:
    mean_yield = df[df['Genotype'] == check]['Yield'].mean()
    print(f"{check}: {mean_yield:.2f}")




ANOVA Table (Fixed Effects Model):
                     sum_sq    df         F    PR(>F)
C(Replication)   310.544318   2.0  1.340299  0.282317
C(Genotype)     1421.139044  11.0  1.115198  0.395420
Residual        2548.675686  22.0       NaN       NaN

GCA Effects:
P3: 0.496
P2: 0.650
P4: -2.269
P1: -1.930

SCA Effects:
P1xP2: 1.273
P1xP3: 2.415
P1xP4: -10.542
P2xP3: -2.988
P2xP4: 0.061
P3xP4: 6.728

Check Variety Means:
Check1: 98.82
Check2: 92.76
