# Step 1: Load Required Libraries and Define Functions

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.stats import t
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import pairwise_tukeyhsd




In [2]:
# Function to calculate LSD (Least Significant Difference) based on ANOVA
def calculate_lsd_anova(data, trait, alpha=0.05):
    # Perform ANOVA assuming `ENV` and `REP` as factors (replace with your factors if different)
    model = ols(f'{trait} ~ C(ENV) + C(REP)', data=data).fit()
    anova_table = anova_lm(model)

    # Extract Mean Square Error (MSE) and degrees of freedom from ANOVA
    mse = anova_table['mean_sq'][-1]  # Last row in ANOVA table is the residual/error
    df_error = anova_table['df'][-1]

    # Calculate t-critical value for the given confidence level
    t_critical = t.ppf(1 - alpha / 2, df_error)

    # Calculate LSD
    lsd = t_critical * np.sqrt(2 * mse / data['TRT'].nunique())  # Adjust 'TRT' based on treatment count per ENV
    return lsd


In [3]:
# Function to perform Tukey's HSD test
def perform_tukey_hsd(data, trait):
    tukey_result = pairwise_tukeyhsd(endog=data[trait], groups=data['RIL_ID'], alpha=0.05)
    return tukey_result


# Step 2: Upload CSV File Using Google Colab’s Upload Option

In [4]:
# Import the files module for uploading
from google.colab import files

# Upload the CSV file
uploaded = files.upload()


Saving Book1.csv to Book1.csv


In [5]:
# Load the dataset
import io
import pandas as pd
data = pd.read_csv(io.BytesIO(uploaded['Book1.csv']))

# Preview the dataset
print("Dataset Preview:")
print(data.head())


Dataset Preview:
   TRT  REP   ENV   DTH   GFD   PHT   TNS        TGW         KA        KW  \
0    1    1  GH22  36.0  41.0  72.0  16.0  42.773723  16.106310  3.450812   
1    1    2  GH22  39.0  33.0  83.0  17.5  41.729323  16.157651  3.504968   
2    1    3  GH22  38.0  50.0  76.0  15.5  45.444444  17.150929  3.522329   
3    1    1  FL22  35.0  21.0   NaN   NaN        NaN        NaN       NaN   
4    1    2  FL22  35.0  20.0   NaN   NaN        NaN        NaN       NaN   

         KL        KC       KLW    KPS     GWS  
0  6.314301  1.324957  1.839293  34.25  1.4650  
1  6.276597  1.306594  1.798781  33.25  1.3875  
2  6.609778  1.336506  1.883759  22.50  1.0225  
3       NaN       NaN       NaN    NaN     NaN  
4       NaN       NaN       NaN    NaN     NaN  


In [6]:
print("\nDataset Information:")
print(data.info())


Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1710 entries, 0 to 1709
Data columns (total 15 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   TRT     1710 non-null   int64  
 1   REP     1710 non-null   int64  
 2   ENV     1710 non-null   object 
 3   DTH     1698 non-null   float64
 4   GFD     1685 non-null   float64
 5   PHT     1323 non-null   float64
 6   TNS     1322 non-null   float64
 7   TGW     1322 non-null   float64
 8   KA      1322 non-null   float64
 9   KW      1322 non-null   float64
 10  KL      1322 non-null   float64
 11  KC      1322 non-null   float64
 12  KLW     1322 non-null   float64
 13  KPS     1322 non-null   float64
 14  GWS     1322 non-null   float64
dtypes: float64(12), int64(2), object(1)
memory usage: 200.5+ KB
None
