# Required packages

In [None]:
if(!require(lme4)) install.packages("lme4")
if(!require(lmerTest)) install.packages("lmerTest")
if(!require(emmeans)) install.packages("emmeans")
if(!require(pbkrtest)) install.packages("pbkrtest")
if(!require(multcompView)) install.packages("multcompView")
if(!require(multcomp)) install.packages("multcomp")
library(lme4)
library(lmerTest)
library(emmeans)
library(pbkrtest)
library(multcompView)
library(multcomp)

# Intall agrivoltaics()

### Before installing, please download Rtools (https://cran.r-project.org/bin/windows/Rtools)

In [46]:
if(!require(remotes)) install.packages("remotes")
library(remotes)
if (!requireNamespace("agrivoltaics", quietly = TRUE)) {
  remotes::install_github("agronomy4future/agrivoltaics", force= TRUE)
}
library(agrivoltaics)

# Data upload

In [55]:
if(!require(readr)) install.packages("readr")
library(readr)
github="https://raw.githubusercontent.com/agronomy4future/raw_data_practice/refs/heads/main/agrivoltaics.csv"
df= data.frame(read_csv(url(github), show_col_types=FALSE))
df$Plot= as.factor(df$Plot)
df$Yield= as.numeric(df$Yield)
set.seed(100)
print(df[sample(nrow(df),5),])

         Season Location AV_Site Genotype Plot Block  Row Yield
202 2015 season     East      AV      cv1  101     I East 195.4
112 2016 season  MidWest Control      cv1  115    IV East 625.1
206 2015 season     East      AV      cv2  102    II East 135.9
4   2015 season  MidWest Control      cv1  109     I West 384.4
311 2016 season     East      AV      cv1  107   III East 125.1


# Field layout

# 1) Single crop (only 1 cultivar) with a single row

In [56]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= NULL,
  plot= NULL,
  block= Block,
  row= NULL,
  season= NULL,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + (1 | AV_Site:Block) 


 VARIANCE COMPONENTS:
 Groups        Name        Variance
 AV_Site:Block (Intercept)  104.22 
 Residual                  8072.88 

 VARIANCE COMPONENT BREAKDOWN (%):
Random [AV_Site:Block]: 1.27%
Residual: 98.73%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
         Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
AV_Site 5471323 5471323     1     6  677.74 2.119e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


## Tukey post-hoc analyses (LSD: adjust= "none"  /   Tukey: adjust= "sidak")

In [59]:
post_hoc= cld (emmeans(model, ~ AV_Site), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

 AV_Site emmean   SE df lower.CL upper.CL .group
 Control    474 8.75  6      448      500  a    
 AV         152 8.75  6      126      178   b   

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 2 estimates 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 


## Pairwise mean comparison

In [60]:
pairwise= contrast(emmeans(model, ~ AV_Site), method= "pairwise", adjust= "sidak")
print(summary(pairwise))

 contrast     estimate   SE df t.ratio p.value
 AV - Control     -322 12.4  6 -26.033  <.0001

Degrees-of-freedom method: kenward-roger 


# 2) Single crop (only 1 cultivar) with multiple rows

In [61]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= NULL,
  plot= NULL,
  block= Block,
  row= Row,
  season= NULL,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + Row + AV_Site:Row + (1 | AV_Site:Block) + (1 |      Block:Row) 


 VARIANCE COMPONENTS:
 Groups        Name        Variance
 Block:Row     (Intercept)  241.608
 AV_Site:Block (Intercept)   81.111
 Residual                  6535.598

 VARIANCE COMPONENT BREAKDOWN (%):
Random [Block:Row]: 3.52%
Random [AV_Site:Block]: 1.18%
Residual: 95.29%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
             Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
AV_Site     4537454 4537454     1   4.794 694.2676 2.258e-06 ***
Row           83404   41702     2   8.012   6.3807     0.022 *  
AV_Site:Row  301034  150517     2 301.813  23.0303 4.915e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [62]:
post_hoc= cld (emmeans(model, ~ AV_Site:Row), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

 AV_Site Row    emmean   SE   df lower.CL upper.CL .group
 Control East      523 13.5 14.5    481.4      564  a    
 Control West      471 13.5 14.5    429.6      512  a    
 Control Middle    385 16.9 34.4    338.2      432   b   
 AV      Middle    184 16.9 34.4    136.6      231    c  
 AV      East      157 13.5 14.5    116.3      199    c  
 AV      West      131 13.5 14.5     90.3      173    c  

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 6 estimates 
P value adjustment: sidak method for 15 tests 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 


In [63]:
pairwise= contrast(emmeans(model, ~ AV_Site:Row), method= "pairwise", adjust= "sidak")
print(summary(pairwise))

 contrast                      estimate   SE   df t.ratio p.value
 AV East - Control East          -365.1 15.6 14.9 -23.334  <.0001
 AV East - AV Middle              -26.2 20.7 21.9  -1.269  0.9748
 AV East - Control Middle        -227.8 21.6 23.5 -10.532  <.0001
 AV East - AV West                 26.0 18.0 12.8   1.444  0.9419
 AV East - Control West          -313.2 19.1 14.5 -16.381  <.0001
 Control East - AV Middle         338.8 21.6 23.5  15.668  <.0001
 Control East - Control Middle    137.3 20.7 21.9   6.643  <.0001
 Control East - AV West           391.1 19.1 14.5  20.455  <.0001
 Control East - Control West       51.9 18.0 12.8   2.877  0.1796
 AV Middle - Control Middle      -201.5 21.2 46.5  -9.511  <.0001
 AV Middle - AV West               52.3 20.7 21.9   2.529  0.2520
 AV Middle - Control West        -287.0 21.6 23.5 -13.270  <.0001
 Control Middle - AV West         253.8 21.6 23.5  11.736  <.0001
 Control Middle - Control West    -85.4 20.7 21.9  -4.133  0.0066
 AV West -

# 3) Single crop (only 1 cultivar) with a single row in different seasons

In [64]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= NULL,
  plot= NULL,
  block= Block,
  row= NULL,
  season= Season,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + Season + AV_Site:Season + (1 | AV_Site:Block) +      (1 | Block:Season) 


 VARIANCE COMPONENTS:
 Groups        Name        Variance
 AV_Site:Block (Intercept)   81.706
 Block:Season  (Intercept)  185.394
 Residual                  6362.830

 VARIANCE COMPONENT BREAKDOWN (%):
Random [AV_Site:Block]: 1.23%
Random [Block:Season]: 2.8%
Residual: 95.97%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
                Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
AV_Site        5481197 5481197     1   4.095 861.440 6.453e-06 ***
Season          181210  181210     1   4.964  28.479  0.003166 ** 
AV_Site:Season  126636  126636     1 306.753  19.902 1.145e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [65]:
post_hoc= cld (emmeans(model, ~ AV_Site:Season), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

 AV_Site Season      emmean   SE   df lower.CL upper.CL .group
 Control 2016 season    529 12.1 11.5      494      565  a    
 Control 2015 season    419 12.1 11.5      384      455   b   
 AV      2016 season    167 12.1 11.5      132      203    c  
 AV      2015 season    137 12.1 11.5      102      173    c  

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 4 estimates 
P value adjustment: sidak method for 6 tests 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 


In [66]:
pairwise= contrast(emmeans(model, ~ AV_Site:Season), method= "pairwise", adjust= "sidak")
print(summary(pairwise))

 contrast                                  estimate   SE    df t.ratio p.value
 AV 2015 season - Control 2015 season        -282.2 14.1  9.80 -19.962  <.0001
 AV 2015 season - AV 2016 season              -30.2 15.9  9.53  -1.906  0.4214
 AV 2015 season - Control 2016 season        -392.1 17.1 11.47 -22.920  <.0001
 Control 2015 season - AV 2016 season         252.0 17.1 11.47  14.732  <.0001
 Control 2015 season - Control 2016 season   -109.8 15.9  9.53  -6.921  0.0003
 AV 2016 season - Control 2016 season        -361.8 14.1  9.80 -25.590  <.0001

Degrees-of-freedom method: kenward-roger 
P value adjustment: sidak method for 6 tests 


# 4) Single crop (only 1 cultivar) with multiple rows in different seasons

In [67]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= NULL,
  plot= NULL,
  block= Block,
  row= Row,
  season= Season,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + Season + Row + AV_Site:Row + AV_Site:Season +      AV_Site:Season:Row + (1 | AV_Site:Block) + (1 | Block:Season) +      (1 | Block:Row) 


 VARIANCE COMPONENTS:
 Groups        Name        Variance
 Block:Row     (Intercept)  291.59 
 Block:Season  (Intercept)  204.44 
 AV_Site:Block (Intercept)  110.20 
 Residual                  4781.10 

 VARIANCE COMPONENT BREAKDOWN (%):
Random [Block:Row]: 5.41%
Random [Block:Season]: 3.79%
Random [AV_Site:Block]: 2.05%
Residual: 88.75%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
                    Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
AV_Site            2750703 2750703     1   8.118 575.3283 7.951e-09 ***
Season              134561  134561     1   4.814  28.1443  0.003559 ** 
Row                  63552   31776     2   7.407   6.6462  0.022236 *  
AV_Site:Row         116562   58281     2 291.859  12.1898 8.233e-06 ***
AV_Site:Season       62212   62212     

In [71]:
post_hoc= cld (emmeans(model, ~ AV_Site:Row), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

NOTE: Results may be misleading due to involvement in interactions



 AV_Site Row    emmean   SE   df lower.CL upper.CL .group
 Control East      523 14.2 13.8      479      566  a    
 Control West      471 14.2 13.8      427      514  a    
 Control Middle    385 16.6 25.8      338      433   b   
 AV      Middle    184 16.6 25.8      136      231    c  
 AV      East      157 14.2 13.8      114      201    c  
 AV      West      131 14.2 13.8       88      175    c  

Results are averaged over the levels of: Season 
Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 6 estimates 
P value adjustment: sidak method for 15 tests 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 


# 5) Multiple cultivars with a single row

In [72]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= Genotype,
  plot= NULL,
  block= Block,
  row= NULL,
  season= NULL,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + Genotype + AV_Site:Genotype + (1 | AV_Site:Block) 


 VARIANCE COMPONENTS:
 Groups        Name        Variance
 AV_Site:Block (Intercept)  112.42 
 Residual                  7744.87 

 VARIANCE COMPONENT BREAKDOWN (%):
Random [AV_Site:Block]: 1.43%
Residual: 98.57%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
                  Sum Sq Mean Sq NumDF DenDF  F value    Pr(>F)    
AV_Site          5249020 5249020     1     6 677.7416 2.119e-07 ***
Genotype          115376  115376     1   310  14.8970 0.0001382 ***
AV_Site:Genotype    2452    2452     1   310   0.3166 0.5740664    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [73]:
post_hoc= cld (emmeans(model, ~ AV_Site), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

NOTE: Results may be misleading due to involvement in interactions



 AV_Site emmean   SE df lower.CL upper.CL .group
 Control    474 8.75  6      448      500  a    
 AV         152 8.75  6      126      178   b   

Results are averaged over the levels of: Genotype 
Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 2 estimates 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 


# 6) Multiple cultivars with multiple rows

In [74]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= Genotype,
  plot= NULL,
  block= Block,
  row= Row,
  season= NULL,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + Genotype + Row + AV_Site:Row + AV_Site:Genotype +      AV_Site:Genotype:Row + (1 | AV_Site:Block) + (1 | Block:Row) 


 VARIANCE COMPONENTS:
 Groups        Name        Variance
 Block:Row     (Intercept)  256.53 
 AV_Site:Block (Intercept)   90.41 
 Residual                  6106.80 

 VARIANCE COMPONENT BREAKDOWN (%):
Random [Block:Row]: 3.97%
Random [AV_Site:Block]: 1.4%
Residual: 94.62%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
                      Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
AV_Site              3012785 3012785     1   9.456 493.3492 1.766e-09 ***
Genotype              136141  136141     1 295.731  22.2933 3.619e-06 ***
Row                    78260   39130     2   7.934   6.4076   0.02207 *  
AV_Site:Row           231828  115914     2 295.731  18.9811 1.755e-08 ***
AV_Site:Genotype        1527    1527     1 295.731   0.2500   0.61742    
AV_Site:Genotype:Row   48210   12053     

In [75]:
post_hoc= cld (emmeans(model, ~ AV_Site:Row), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

NOTE: Results may be misleading due to involvement in interactions



 AV_Site Row    emmean   SE   df lower.CL upper.CL .group
 Control East      523 13.5 14.2    481.4      564  a    
 Control West      471 13.5 14.2    429.5      512  a    
 Control Middle    385 16.7 32.3    338.6      432   b   
 AV      Middle    184 16.7 32.3    137.0      230    c  
 AV      East      157 13.5 14.2    116.3      199    c  
 AV      West      131 13.5 14.2     90.3      173    c  

Results are averaged over the levels of: Genotype 
Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 6 estimates 
P value adjustment: sidak method for 15 tests 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 


# 7) Multiple cultivars with a single row in different seasons

In [76]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= Genotype,
  plot= Plot,
  block= Block,
  row= NULL,
  season= Season,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + Genotype + Season + AV_Site:Genotype + AV_Site:Season +      Genotype:Season + (1 | AV_Site:Block) + (1 | Block:Season) +      (1 | Block:Plot) 



boundary (singular) fit: see help('isSingular')

“️ Model fit is singular. Some variance components may be near zero or redundant.”



 VARIANCE COMPONENTS:
 Groups        Name        Variance
 Block:Plot    (Intercept)  535.72 
 Block:Season  (Intercept)  180.95 
 AV_Site:Block (Intercept)    0.00 
 Residual                  5703.96 

 VARIANCE COMPONENT BREAKDOWN (%):
Random [Block:Plot]: 8.34%
Random [Block:Season]: 2.82%
Random [AV_Site:Block]: 0%
Residual: 88.84%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
                  Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
AV_Site          2882354 2882354     1  10.967 505.3254 1.597e-10 ***
Genotype           40083   40083     1  10.967   7.0273  0.022602 *  
Season            172946  172946     1   4.512  30.3204  0.003706 ** 
AV_Site:Genotype     852     852     1  10.967   0.1493  0.706554    
AV_Site:Season    126636  126636     1 297.426  22.2014 3.775e-06 ***
Genotype:Season     6155    6155     1 297.426   1.0790  0.299757    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


In [77]:
post_hoc= cld (emmeans(model, ~ AV_Site:Season), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

 AV_Site Season      emmean   SE   df lower.CL upper.CL .group
 Control 2016 season    529 13.5 10.5    488.7      570  a    
 Control 2015 season    419 13.5 10.5    378.9      460   b   
 AV      2016 season    167 13.5 10.5    126.9      208    c  
 AV      2015 season    137 13.5 10.5     96.6      178    c  

Results are averaged over the levels of: Genotype 
Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 4 estimates 
P value adjustment: sidak method for 6 tests 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 


# 8) Multiple cultivars with multiple rows in different seasons

In [78]:
model= agrivoltaics(
  output= Yield,
  treatment= AV_Site,
  genotype= Genotype,
  plot= Plot,
  block= Block,
  row= Row,
  season= Season,
  location= NULL,
  data= df
)


 MODEL FORMULA USED:
Yield ~ AV_Site + Genotype + Season + Row + AV_Site:Row + AV_Site:Genotype +      AV_Site:Season + Genotype:Season + AV_Site:Genotype:Row +      AV_Site:Season:Row + Genotype:Season:Row + (1 | AV_Site:Block) +      (1 | Block:Season) + (1 | Block:Plot) + (1 | Plot:Row) 



boundary (singular) fit: see help('isSingular')

“️ Model fit is singular. Some variance components may be near zero or redundant.”



 VARIANCE COMPONENTS:
 Groups        Name        Variance
 Plot:Row      (Intercept) 2204.252
 Block:Plot    (Intercept)   22.565
 Block:Season  (Intercept)  248.407
 AV_Site:Block (Intercept)    0.000
 Residual                  3054.075

 VARIANCE COMPONENT BREAKDOWN (%):
Random [Plot:Row]: 39.86%
Random [Block:Plot]: 0.41%
Random [Block:Season]: 4.49%
Random [AV_Site:Block]: 0%
Residual: 55.23%

 TYPE III ANOVA:
Type III Analysis of Variance Table with Satterthwaite's method
                     Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
AV_Site              680928  680928     1  25.231 222.9570 4.930e-14 ***
Genotype              24919   24919     1  11.007   8.1591  0.015607 *  
Season                86989   86989     1   4.583  28.4829  0.004012 ** 
Row                   29242   14621     2  23.020   4.7874  0.018260 *  
AV_Site:Row           37494   18747     2  27.435   6.1383  0.006263 ** 
AV_Site:Genotype        223     223     1  30.014   0.0732  0.788626    
AV_Sit

In [79]:
post_hoc= cld (emmeans(model, ~ AV_Site:Season), adjust= "sidak", Letters=letters, reverse= TRUE)
print(post_hoc)

NOTE: Results may be misleading due to involvement in interactions



 AV_Site Season      emmean   SE   df lower.CL upper.CL .group
 Control 2016 season    513 14.1 9.76    469.7      556  a    
 Control 2015 season    406 14.1 9.76    363.5      449   b   
 AV      2016 season    173 14.1 9.76    130.4      216    c  
 AV      2015 season    142 14.1 9.76     98.8      185    c  

Results are averaged over the levels of: Genotype, Row 
Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Conf-level adjustment: sidak method for 4 estimates 
P value adjustment: sidak method for 6 tests 
significance level used: alpha = 0.05 
NOTE: If two or more means share the same grouping symbol,
      then we cannot show them to be different.
      But we also did not show them to be the same. 
