# Step 1: Load Required Libraries and Define Functions

In [27]:
# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.stats import kurtosis, t
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import os



In [28]:
# Function to calculate LSD (Least Significant Difference) at 0.05 confidence level
def calculate_lsd(trait_data):
    return 1.96 * trait_data.std() / np.sqrt(len(trait_data))


In [29]:

# Function to calculate standard error
def calculate_se(trait_data):
    return trait_data.std() / np.sqrt(len(trait_data))


In [30]:
# Function to calculate LSD using ANOVA
def calculate_lsd_anova(data, trait, alpha=0.05):
    model = ols(f'{trait} ~ C(ENV) + C(REP)', data=data).fit()
    anova_table = anova_lm(model)
    mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
    df_error = anova_table['df'][-1]
    t_critical = t.ppf(1 - alpha / 2, df_error)
    lsd = t_critical * np.sqrt(2 * mse / data['TRT'].nunique())
    return lsd, mse, df_error, t_critical


# Step 2: Upload CSV File Using Google Colab’s Upload Option

In [4]:
# Import the files module for uploading
from google.colab import files

# Upload the CSV file
uploaded = files.upload()


Saving Book1.csv to Book1.csv


In [31]:
# Load the dataset
import io
import pandas as pd
data = pd.read_csv(io.BytesIO(uploaded['Book1.csv']))

# Preview the dataset
print("Dataset Preview:")
print(data.head())


Dataset Preview:
   TRT  REP   ENV   DTH   GFD   PHT   TNS        TGW         KA        KW  \
0    1    1  GH22  36.0  41.0  72.0  16.0  42.773723  16.106310  3.450812   
1    1    2  GH22  39.0  33.0  83.0  17.5  41.729323  16.157651  3.504968   
2    1    3  GH22  38.0  50.0  76.0  15.5  45.444444  17.150929  3.522329   
3    1    1  FL22  35.0  21.0   NaN   NaN        NaN        NaN       NaN   
4    1    2  FL22  35.0  20.0   NaN   NaN        NaN        NaN       NaN   

         KL        KC       KLW    KPS     GWS  
0  6.314301  1.324957  1.839293  34.25  1.4650  
1  6.276597  1.306594  1.798781  33.25  1.3875  
2  6.609778  1.336506  1.883759  22.50  1.0225  
3       NaN       NaN       NaN    NaN     NaN  
4       NaN       NaN       NaN    NaN     NaN  


In [32]:
print("\nDataset Information:")
print(data.info())


Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1710 entries, 0 to 1709
Data columns (total 15 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   TRT     1710 non-null   int64  
 1   REP     1710 non-null   int64  
 2   ENV     1710 non-null   object 
 3   DTH     1698 non-null   float64
 4   GFD     1685 non-null   float64
 5   PHT     1323 non-null   float64
 6   TNS     1322 non-null   float64
 7   TGW     1322 non-null   float64
 8   KA      1322 non-null   float64
 9   KW      1322 non-null   float64
 10  KL      1322 non-null   float64
 11  KC      1322 non-null   float64
 12  KLW     1322 non-null   float64
 13  KPS     1322 non-null   float64
 14  GWS     1322 non-null   float64
dtypes: float64(12), int64(2), object(1)
memory usage: 200.5+ KB
None


# Step-3: Define the trait columns

In [33]:
trait = 'DTH'
trait2 = 'GFD'
trait3 = 'PHT'
trait4 = 'TNS'
trait5 = 'TGW'
trait6 = 'KA'
trait7 = 'KW'
trait8 = 'KL'
trait9 = 'KC'
trait10 = 'KLW'
trait11 = 'KPS'
trait12 = 'GWS'

# Step 4: Process and Analyze Data

In [49]:
# Add LSD-ANOVA calculation for each trait based on ENV and REP
results = pd.DataFrame(columns=[
    'Trait', 'Mean', 'Std Error', 'Min', 'Max', 'CV%', 'Kurtosis', 'LSD (0.05)', 'LSD-ANOVA'
])

for column in data.columns[2:]:  # Start from column 2, excluding TRT and REP
    try:
        # Convert the column to numeric, forcing non-numeric values to NaN
        trait_data = pd.to_numeric(data[column], errors='coerce').dropna()

        if trait_data.empty:
            print(f"Skipping column '{column}' because it contains no numeric data.")
            continue

        # Basic statistics
        mean_val = trait_data.mean()
        se_val = calculate_se(trait_data)
        min_val = trait_data.min()
        max_val = trait_data.max()
        cv_val = (trait_data.std() / mean_val) * 100 if mean_val != 0 else None
        kurt_val = kurtosis(trait_data, fisher=True)
        lsd_val = calculate_lsd(trait_data)

        # LSD-ANOVA calculation
        try:
            lsd_anova, mse, df_error, t_critical = calculate_lsd_anova(data, column)
        except Exception as anova_error:
            lsd_anova = None
            print(f"ANOVA failed for column '{column}': {anova_error}")

        # Append results
        results = pd.concat([results, pd.DataFrame({
            'Trait': [column],
            'Mean': [mean_val],
            'Std Error': [se_val],
            'Min': [min_val],
            'Max': [max_val],
            'CV%': [cv_val],
            'Kurtosis': [kurt_val],
            'LSD (0.05)': [lsd_val],
            'LSD-ANOVA': [lsd_anova]
        })], ignore_index=True)

    except Exception as e:
        print(f"Error processing column '{column}': {e}")

# Display the results
results


Skipping column 'ENV' because it contains no numeric data.


  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  results = pd.concat([results, pd.DataFrame({
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova

Unnamed: 0,Trait,Mean,Std Error,Min,Max,CV%,Kurtosis,LSD (0.05),LSD-ANOVA
0,DTH,40.580683,0.114352,29.0,82.0,11.611598,5.228444,0.224129,0.821843
1,GFD,34.91276,0.170767,18.0,57.0,20.077921,-0.179044,0.334703,0.809755
2,PHT,80.46586,0.310638,49.0,116.0,14.041814,-0.255565,0.608851,2.119643
3,TNS,16.602483,0.063737,10.375,26.0,13.958349,0.129214,0.124924,0.382017
4,TGW,41.820859,0.206101,1.829653,66.691729,17.918516,0.231273,0.403957,1.047002
5,KA,16.579042,0.047357,11.904847,22.893021,10.385836,-0.068017,0.09282,0.287579
6,KW,3.482398,0.006705,2.70423,4.159718,7.00064,-0.207774,0.013142,0.034644
7,KL,6.451018,0.009268,5.374188,7.537133,5.223615,0.127042,0.018165,0.063901
8,KC,1.34186,0.001639,1.22213,1.5994,4.441633,0.596589,0.003213,0.008977
9,KLW,1.877138,0.003951,1.522176,2.387019,7.653451,-0.002796,0.007745,0.021591


In [50]:
# Perform extended analysis by environment
results_list = []

for env, env_data in data.groupby('ENV'):
    for column in ['DTH', 'GFD', 'PHT', 'TNS', 'TGW', 'KA', 'KW', 'KL', 'KC', 'KLW', 'KPS', 'GWS']:
        trait_data = pd.to_numeric(env_data[column], errors='coerce').dropna()

        if len(trait_data) == 0:
            continue

        mean_val = trait_data.mean()
        se_val = calculate_se(trait_data)
        cv_val = (trait_data.std() / mean_val) * 100 if mean_val != 0 else None
        kurt_val = kurtosis(trait_data, fisher=True)
        lsd_val, mse_val, df_error, t_critical = calculate_lsd_anova(env_data, column)

        # Append each result as a dictionary
        results_list.append({
            'Environment': env,
            'Trait': column,
            'Mean': mean_val,
            'Standard Error (SE)': se_val,
            'CV%': cv_val,
            'Kurtosis': kurt_val,
            'MSE': mse_val,
            'Degrees of Freedom (df)': df_error,
            't-Critical Value': t_critical,
            'LSD (0.05)': lsd_val
        })

# Convert to DataFrame and display
extended_results = pd.DataFrame(results_list)
extended_results


  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq'][-1]  # Mean Square Error (MSE)
  df_error = anova_table['df'][-1]
  mse = anova_table['mean_sq']

Unnamed: 0,Environment,Trait,Mean,Standard Error (SE),CV%,Kurtosis,MSE,Degrees of Freedom (df),t-Critical Value,LSD (0.05)
0,FL22,DTH,38.50266,0.184852,9.309512,-0.490748,12.879503,374.0,1.966327,0.724008
1,FL22,GFD,26.508242,0.22412,16.130613,0.445472,18.332001,362.0,1.966539,0.863865
2,FL23,DTH,37.997347,0.190144,9.716271,0.352705,13.665803,375.0,1.96631,0.745775
3,FL23,GFD,34.816976,0.141853,7.910757,1.326803,7.59033,375.0,1.96631,0.555803
4,FL23,PHT,81.488948,0.539776,12.861328,-0.31126,109.323028,375.0,1.96631,2.109337
5,FL23,TNS,14.480006,0.08734,11.696012,0.062889,2.833088,374.0,1.966327,0.339566
6,FL23,TGW,33.917718,0.235077,13.439347,6.101623,20.69152,374.0,1.966327,0.917678
7,FL23,KA,15.197772,0.05843,7.445084,0.130304,1.283663,373.0,1.966344,0.228572
8,FL23,KW,3.222993,0.0082,4.933382,0.414898,0.025333,374.0,1.966327,0.03211
9,FL23,KL,6.393247,0.01253,3.800373,0.095081,0.059183,374.0,1.966327,0.049079
