# PREVIOUS WORK

In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Convert Table to pandas DataFrame
RR_normalized_df = RR_normalized_plant_growth_df

# Define your ANOVA model
# Assuming 'shoot_length_cm' is the dependent variable and 'treatment' is the factor
model_shoot_length = ols('shoot_length_cm ~ C(treatment) + C(soil) + C(treatment):C(soil)', data=plant_growth_df).fit()

# Perform ANOVA
anova_table_shoot_length = sm.stats.anova_lm(model_shoot_length, typ=2)

# Display results
print("ANOVA Table:")
print(anova_table_shoot_length)

The ANOVA table indicates the following:

Sum of Squares (sum_sq):

C(treatment): The variation in shoot_length_cm explained by differences in the treatment groups (133.81).
Residual: The unexplained variation or error (1103.76).
Degrees of Freedom (df):

C(treatment): 2 degrees of freedom (3 treatment groups - 1).
Residual: 68 degrees of freedom (total observations - number of groups).
F-statistic (F):

The F-value (4.12) tests whether the variation among treatment group means is significantly greater than what would be expected by chance.
p-value (PR(>F)):

The p-value (0.020433) indicates the probability of observing this result, assuming the null hypothesis (no difference among treatment means) is true.
Interpretation:
Significance: The p-value is 0.020433, which is below the common threshold of 0.05. Therefore, you can reject the null hypothesis and conclude that there are statistically significant differences in shoot length among the treatment groups.
Next Steps:
Post-hoc Tests:

Perform pairwise comparisons (e.g., Tukey's HSD) to identify which treatments differ significantly.

In [None]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

tukey_shoot_length = pairwise_tukeyhsd(endog=RR_normalized_df['shoot_length_cm'],
                          groups=RR_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_shoot_length)

Interpretation of Tukey's HSD Results
Compost vs. Compost-Biochar:

Mean Difference: 0.8075
p-value (p-adj): 0.768
Conclusion: There is no statistically significant difference between the Compost and Compost-Biochar treatments, as the p-value is greater than 0.05.
Compost vs. Control:

Mean Difference: 3.2336
p-value (p-adj): 0.0191
Conclusion: There is a statistically significant difference between the Compost and Control treatments, as the p-value is less than 0.05. The Compost treatment resulted in higher shoot length compared to the Control.
Compost-Biochar vs. Control:

Mean Difference: 2.4261
p-value (p-adj): 0.11
Conclusion: There is no statistically significant difference between the Compost-Biochar and Control treatments, as the p-value is greater than 0.05.
Summary:
The Compost treatment significantly increases shoot length compared to the Control.
No significant differences were observed between Compost and Compost-Biochar or between Compost-Biochar and Control.


In [None]:
#For seed survival
model_seed_survival = ols('seed_survival_rate ~ C(treatment)', data=RR_normalized_df).fit()

# Perform ANOVA
anova_seed_survival = sm.stats.anova_lm(model_seed_survival, typ=2)

# Display results
print("ANOVA Table:")
print(anova_seed_survival)

In [None]:
tukey_seed_survival = pairwise_tukeyhsd(endog=RR_normalized_df['seed_survival_rate'],
                          groups=RR_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_seed_survival)

In [None]:
#Now for num_spikelets
#For seed survival
model_spikelets = ols('num_spikelets ~ C(treatment)', data=RR_normalized_df).fit()

# Perform ANOVA
anova_spikelets = sm.stats.anova_lm(model_spikelets, typ=2)

# Display results
print("ANOVA Table:")
print(anova_spikelets)

In [None]:
tukey_spikelets = pairwise_tukeyhsd(endog=RR_normalized_df['num_spikelets'],
                          groups=RR_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_spikelets)

In [None]:
#Now for root length
model_root_length = ols('root_length_cm ~ C(treatment)', data=RR_normalized_df).fit()
anova_root_length = sm.stats.anova_lm(model_root_length, typ=2)
print("ANOVA Table:")
print(anova_root_length)

In [None]:
tukey_root_length = pairwise_tukeyhsd(endog=RR_normalized_df['root_length_cm'],
                          groups=RR_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_root_length)

In [None]:
#Now for shoot_root_biomass_g
model_shoot_root_biomass = ols('shoot_root_biomass_g ~ C(treatment)', data=RR_normalized_df).fit()
anova_shoot_root_biomass = sm.stats.anova_lm(model_shoot_root_biomass, typ=2)
print("ANOVA Table:")
print(anova_shoot_root_biomass)

In [None]:
tukey_shoot_root_biomass = pairwise_tukeyhsd(endog=RR_normalized_df['shoot_root_biomass_g'],
                          groups=RR_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_shoot_root_biomass)

---------------------

# Manteca - Plant Growth Metrics

In [None]:
Man_plant_growth_df = Table().read_table('Man_Plant_Growth_Metrics.csv')

Man_normalized_plant_growth_df = Man_plant_growth_df.where('shoot_length_cm', are.above(0))

In [None]:
# Convert Table to pandas DataFrame
Man_normalized_df = Man_normalized_plant_growth_df.to_df()

# Define your ANOVA model
# Assuming 'shoot_length_cm' is the dependent variable and 'treatment' is the factor
model_shoot_length = ols('shoot_length_cm ~ C(treatment)', data=Man_normalized_df).fit()

# Perform ANOVA
anova_table_shoot_length = sm.stats.anova_lm(model_shoot_length, typ=2)

# Display results
print("ANOVA Table:")
print(anova_table_shoot_length)

In [None]:
tukey_shoot_length = pairwise_tukeyhsd(endog=Man_normalized_df['shoot_length_cm'],
                          groups=Man_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_shoot_length)

In [None]:
#For seed survival
model_seed_survival = ols('seed_survival_rate ~ C(treatment)', data=Man_normalized_df).fit()

# Perform ANOVA
anova_seed_survival = sm.stats.anova_lm(model_seed_survival, typ=2)

# Display results
print("ANOVA Table:")
print(anova_seed_survival)

In [None]:
tukey_seed_survival = pairwise_tukeyhsd(endog=Man_normalized_df['seed_survival_rate'],
                          groups=Man_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_seed_survival)

In [None]:
#Now for num_spikelets
#For seed survival
model_spikelets = ols('num_spikelets ~ C(treatment)', data=Man_normalized_df).fit()

# Perform ANOVA
anova_spikelets = sm.stats.anova_lm(model_spikelets, typ=2)

# Display results
print("ANOVA Table:")
print(anova_spikelets)

In [None]:
## NO SPIKELETS!!!

In [None]:
#Now for root length
model_root_length = ols('root_length_cm ~ C(treatment)', data=Man_normalized_df).fit()
anova_root_length = sm.stats.anova_lm(model_root_length, typ=2)
print("ANOVA Table:")
print(anova_root_length)

In [None]:
tukey_root_length = pairwise_tukeyhsd(endog=Man_normalized_df['root_length_cm'],
                          groups=RR_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_root_length)

In [None]:
#Now for shoot_root_biomass_g
model_shoot_root_biomass = ols('shoot_root_biomass_g ~ C(treatment)', data=Man_normalized_df).fit()
anova_shoot_root_biomass = sm.stats.anova_lm(model_shoot_root_biomass, typ=2)
print("ANOVA Table:")
print(anova_shoot_root_biomass)

In [None]:
tukey_shoot_root_biomass = pairwise_tukeyhsd(endog=RR_normalized_df['shoot_root_biomass_g'],
                          groups=RR_normalized_df['treatment'],
                          alpha=0.05)
print(tukey_shoot_root_biomass)

--------

# Russell Ranch - pH & EC

In [None]:
RR_chem = Table().read_table('RR_Chemical_Properties.csv')

RR_chem_df = RR_chem.to_df()

# Separate T0 and End data
RR_T0 = RR_chem_df[RR_chem_df["Sample"].str.contains("T0")]
RR_end = RR_chem_df[RR_chem_df["Sample"].str.contains("end")]

In [None]:
from scipy.stats import shapiro, levene

# Normality test (Shapiro-Wilk)
shapiro_T0_pH = shapiro(RR_T0["pH"])[1]
shapiro_end_pH = shapiro(RR_end["pH"])[1]
shapiro_T0_EC = shapiro(RR_T0["EC (uS/cm)"])[1]
shapiro_end_EC = shapiro(RR_end["EC (uS/cm)"])[1]

print(f"Shapiro-Wilk p-values: T0 pH={shapiro_T0_pH}, End pH={shapiro_end_pH}")
print(f"Shapiro-Wilk p-values: T0 EC={shapiro_T0_EC}, End EC={shapiro_end_EC}")

# Homogeneity of variance (Levene’s test)
levene_pH = levene(RR_T0["pH"], RR_end["pH"])[1]
levene_EC = levene(RR_T0["EC (uS/cm)"], RR_end["EC (uS/cm)"])[1]

print(f"Levene's test p-values: pH={levene_pH}, EC={levene_EC}")

### non-normal because p>0.05 -> use non-parametric test

In [None]:
from scipy.stats import mannwhitneyu

u_pH, p_mwu_pH = mannwhitneyu(RR_T0["pH"], RR_end["pH"])
u_EC, p_mwu_EC = mannwhitneyu(RR_T0["EC (uS/cm)"], RR_end["EC (uS/cm)"])

print(f"Mann-Whitney U test for pH: U={u_pH}, p={p_mwu_pH}")
print(f"Mann-Whitney U test for EC: U={u_EC}, p={p_mwu_EC}")

In [None]:
plt.figure(figsize=(10,5))
sns.boxplot(x=["T0"]*len(RR_T0) + ["End"]*len(RR_end), y=RR_T0["pH"].tolist() + RR_end["pH"].tolist())
plt.title("pH Distribution at T0 and End")
plt.show()

plt.figure(figsize=(10,5))
sns.boxplot(x=["T0"]*len(RR_T0) + ["End"]*len(RR_end), y=RR_T0["EC (uS/cm)"].tolist() + RR_end["EC (uS/cm)"].tolist())
plt.title("EC Distribution at T0 and End")
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.histplot(RR_T0["pH"], kde=True, color="blue", label="T0", bins=10)
sns.histplot(RR_end["pH"], kde=True, color="red", label="End", bins=10)
plt.legend()
plt.title("pH Distribution: T0 vs End")
plt.show()

plt.figure(figsize=(10,5))
sns.histplot(RR_T0["EC (uS/cm)"], kde=True, color="blue", label="T0", bins=10)
sns.histplot(RR_end["EC (uS/cm)"], kde=True, color="red", label="End", bins=10)
plt.legend()
plt.title("EC Distribution: T0 vs End")
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.scatterplot(x=RR_T0["pH"], y=RR_T0["EC (uS/cm)"], color="blue", label="T0")
sns.scatterplot(x=RR_end["pH"], y=RR_end["EC (uS/cm)"], color="red", label="End")
plt.title("pH vs. EC Scatter Plot: T0 vs End")
plt.xlabel("pH")
plt.ylabel("EC (uS/cm)")
plt.legend()
plt.show()


consider running tests separately for ec and ph!!!!!

--------

In [None]:
# Manteca - pH & EC