In [18]:
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install statsmodels
!pip install seaborn
!pip install scipy.stats
!pip install scikit_posthocs



ERROR: Could not find a version that satisfies the requirement scipy.stats (from versions: none)
ERROR: No matching distribution found for scipy.stats




In [19]:
'''PART 1: LINE PLOT GENERATION FOR ABSORBED PAR ON EACH RANK.'''

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import sem, t

# Load datasets for high and low densities
data_high = pd.read_csv('combined_files/combined_high_ranks_cleaned.csv')
data_low = pd.read_csv('combined_files/combined_low_ranks_cleaned.csv')

# Function to calculate mean, std dev, and confidence intervals
def calculate_statistics(data, confidence=0.95):
    grouped = data.groupby(['architecture', 'rank'])['absorbedPAR_umol_m2_s1']
    mean_values = grouped.mean().reset_index(name='mean')
    mean_values['std_dev'] = grouped.std().reset_index(name='std_dev')['std_dev']
    mean_values['n'] = grouped.count().reset_index(name='count')['count']
    
    # Calculate the t-value for 95% confidence interval
    mean_values['t_value'] = mean_values['n'].apply(lambda x: t.ppf((1 + confidence) / 2., x - 1))
    
    # Calculate the margin of error
    mean_values['margin_error'] = mean_values['t_value'] * (mean_values['std_dev'] / np.sqrt(mean_values['n']))
    
    # Calculate lower and upper confidence bounds
    mean_values['ci_lower'] = mean_values['mean'] - mean_values['margin_error']
    mean_values['ci_upper'] = mean_values['mean'] + mean_values['margin_error']
    
    return mean_values

# Calculate statistics for both high-density and low-density datasets
mean_values_high = calculate_statistics(data_high)
mean_values_low = calculate_statistics(data_low)

# Plotting function to avoid repetition
def plot_absorbedPAR(mean_values, density_label, output_file):
    plt.figure(figsize=(10, 6))
    
    # Plotting lineplot for each architecture type within the current density
    for architecture in mean_values['architecture'].unique():
        architecture_data = mean_values[mean_values['architecture'] == architecture]
        sns.lineplot(x='rank', y='mean', data=architecture_data, label=architecture, marker='o')
        
        # Adding the confidence interval as a shaded area
        plt.fill_between(architecture_data['rank'], architecture_data['ci_lower'], architecture_data['ci_upper'], alpha=0.3)
    
    #plt.title(f'Absorbed PAR over Leaf Ranks for {density_label} Density')
    plt.xlabel('Leaf Rank')
    plt.ylabel('Absorbed PAR (umol m-2 s-1)')
    plt.ylim(0, 300)
    plt.xticks(range(1, 11))
    plt.legend(title='Architecture Type')
    plt.savefig(output_file)
    plt.close()

# Plot for high-density data
plot_absorbedPAR(mean_values_high, 'High', 'plots/mean_absorbedPAR_for_high_density.png')

# Plot for low-density data
plot_absorbedPAR(mean_values_low, 'Low', 'plots/mean_absorbedPAR_for_low_density.png')

print("Plots generated successfully for high and low densities.")

Plots generated successfully for high and low densities.


In [20]:
'''PART 2: LINE PLOT FOR  RANKS of ABSORBED PAR OVER EACH LEAF RANK.'''

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import rankdata

# Load datasets for high and low densities
data_high = pd.read_csv('combined_files/combined_high_ranks_cleaned.csv')
data_low = pd.read_csv('combined_files/combined_low_ranks_cleaned.csv')

# Function to calculate ranks for absorbed PAR within each architecture and leaf rank
def calculate_ranks(data):
    # Group by architecture and rank, then calculate the rank for absorbed PAR
    data['rank_absorbedPAR'] = data.groupby(['architecture', 'rank'])['absorbedPAR_umol_m2_s1'].transform(lambda x: rankdata(x, method='average'))
    # Calculate the mean rank for each architecture and leaf rank
    mean_ranks = data.groupby(['architecture', 'rank'])['rank_absorbedPAR'].mean().reset_index(name='mean_rank')
    
    return mean_ranks

# Calculate ranks for both high-density and low-density datasets
mean_ranks_high = calculate_ranks(data_high)
mean_ranks_low = calculate_ranks(data_low)

# Plotting function to avoid repetition
def plot_ranked_absorbedPAR(mean_ranks, density_label, output_file):
    plt.figure(figsize=(10, 6))
    
    # Plotting lineplot for each architecture type within the current density
    for architecture in mean_ranks['architecture'].unique():
        architecture_data = mean_ranks[mean_ranks['architecture'] == architecture]
        sns.lineplot(x='rank', y='mean_rank', data=architecture_data, label=architecture, marker='o')
    
    #plt.title(f'Ranked Absorbed PAR over Leaf Ranks for {density_label} Density')
    plt.xlabel('Leaf Rank')
    plt.ylabel('Mean Ranked data of Absorbed PAR (umol m-2 s-1)')
    
    # Automatically adjust the y-axis based on data range
    plt.ylim(mean_ranks['mean_rank'].min(), mean_ranks['mean_rank'].max())
    plt.xticks(range(1, 11))
    plt.legend(title='Architecture Type')
    plt.savefig(output_file)
    plt.close()

# Plot for high-density data
plot_ranked_absorbedPAR(mean_ranks_high, 'High', 'plots/ranked_absorbedPAR_for_high_density.png')

# Plot for low-density data
plot_ranked_absorbedPAR(mean_ranks_low, 'Low', 'plots/ranked_absorbedPAR_for_low_density.png')




In [28]:
'''PART 3: BAR PLOT OF TOTAL ABS PAR FOR PLANTS AND SENSORS'''

import pandas as pd
import matplotlib.pyplot as plt

# Load datasets for high and low densities
data_high_totalPAR_plants = pd.read_csv('combined_files/combined_total_absorbedPAR_high_cleaned.csv')
data_low_totalPAR_plants = pd.read_csv('combined_files/combined_total_absorbedPAR_low_cleaned.csv')
data_high_sensors = pd.read_csv('combined_files/combined_high_sensors_cleaned.csv')
data_low_sensors = pd.read_csv('combined_files/combined_low_sensors_cleaned.csv')


# Function to calculate mean, std dev, and confidence intervals
def calculate_statistics(data):
    grouped = data.groupby(['architecture'])['absorbedPAR_umol_m2_s1']
    mean_values = grouped.mean().reset_index(name='mean')
    mean_values['std_dev'] = grouped.std().reset_index(name='std_dev')['std_dev']
    mean_values['n'] = grouped.count().reset_index(name='count')['count']
        
    return mean_values

# Calculate statistics for both high-density and low-density datasets
mean_values_high_totalPAR_plants = calculate_statistics(data_high_totalPAR_plants)
mean_values_low_totalPAR_plants = calculate_statistics(data_low_totalPAR_plants)
mean_values_high_sensors = calculate_statistics(data_high_sensors)
mean_values_low_sensors = calculate_statistics(data_low_sensors)

def barplot_absorbedPAR(mean_values, density_label, output_file, significance_letters):
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plotting bar plot for each architecture type within the current density
    for architecture in mean_values['architecture'].unique():
        architecture_data = mean_values[mean_values['architecture'] == architecture]
        mean = architecture_data['mean'].iloc[0]
        std_dev = architecture_data['std_dev'].iloc[0]
        
        # Plotting the bar
        ax.bar(architecture, mean, yerr=std_dev, 
               width=0.5, edgecolor='black', linewidth=0.5, capsize=5, color='green')
        
        # Get the significance letter for the current architecture
        significance_letter = significance_letters.get(architecture, '')

        # Calculate the position for the significance letter
        offset = std_dev * 0.1  # 10% of the error bar length
        y_position = mean + std_dev + offset
        
        # Adding the significance letter above the error bar
        ax.text(architecture, y_position, s=significance_letter, 
                ha='center', va='bottom', fontsize=12)


    # Set the title and labels
    #ax.set_title(f'Total Absorbed PAR for {density_label} density')
    
    # Automatically determine the y-axis label based on the output file name
    if 'sensors' in output_file.lower():
        y_label = 'Absorbed PAR sensors (umol m-2 s-1)'
        y_lim = (0, 70)
    else:
        y_label = 'Absorbed PAR per plant (umol m-2 s-1)'
        y_lim = (0, 1500)

    ax.set_ylabel(y_label)
    ax.set_ylim(y_lim)
    plt.xticks(rotation=45, fontsize=10)

    # Save the plot
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close(fig)

# Example dictionaries containing significance letters for architectures
significance_letters_high_plant = {
    'architecture_A': 'c',
    'architecture_B': 'b',
    'architecture_C': 'a',
    'architecture_D': 'b',
    'architecture_E': 'd',
    'architecture_F': 'e',
    'control': 'd'
}

significance_letters_low_plant = {
    'architecture_A': 'd',
    'architecture_B': 'b',
    'architecture_C': 'a',
    'architecture_D': 'ab',
    'architecture_E': 'e',
    'architecture_F': 'f',
    'control': 'c'
}
significance_letters_high_sensors = {
    'architecture_A': 'e',
    'architecture_B': 'c',
    'architecture_C': 'f',
    'architecture_D': 'e',
    'architecture_E': 'b',
    'architecture_F': 'a',
    'control': 'd'
}

significance_letters_low_sensors = {
    'architecture_A': 'd',
    'architecture_B': 'c',
    'architecture_C': 'e',
    'architecture_D': 'd',
    'architecture_E': 'b',
    'architecture_F': 'a',
    'control': 'd'
}


# Call the barplot function.
barplot_absorbedPAR(mean_values_high_totalPAR_plants, 'High', 'plots/total_absorbedPAR_high_density.png', significance_letters_high_plant)
barplot_absorbedPAR(mean_values_low_totalPAR_plants, 'Low', 'plots/total_absorbedPAR_low_density.png', significance_letters_low_plant)
barplot_absorbedPAR(mean_values_high_sensors, 'High', 'plots/sensors_high_density.png', significance_letters_high_sensors)
barplot_absorbedPAR(mean_values_low_sensors, 'Low', 'plots/sensors_low_density.png', significance_letters_low_sensors)

print("Plots generated successfully for high and low densities.")


Plots generated successfully for high and low densities.


In [47]:
"""PART 4: BOX PLOT FOR ABSORBED PAR FOR PLANTS AND SENSORS"""

import matplotlib.pyplot as plt

# Function to create boxplots with significance letters
def create_boxplot(data, y, title, output_file, significance_letters):
    plt.figure(figsize=(12, 8))
    architectures = data['architecture'].unique()
    data_to_plot = [data[data['architecture'] == arch][y] for arch in architectures]
    
    plt.boxplot(data_to_plot, labels=architectures)
    plt.xticks(rotation=45, fontsize=10)
    
    # Add significance letters
    for i, (architecture, letter) in enumerate(significance_letters.items()):
        architecture_data = data[data['architecture'] == architecture]
        mean_value = architecture_data[y].mean()
        std_dev = architecture_data[y].std()
        offset = std_dev * 2  # 10% of the error bar length
        y_position = mean_value + std_dev + offset
        plt.text(x=i + 1, y=y_position, s=letter, ha='center', va='bottom', fontsize=12, color='black')
        
    # Automatically determine the y-axis label based on the output file name
    if 'sensors' in output_file.lower():
        y_label = 'Absorbed PAR sensors (umol m-2 s-1)'
        y_lim = (0, 80)
    else:
        y_label = 'Absorbed PAR per plant (umol m-2 s-1)'
        y_lim = (600, 1500)

    plt.ylabel(y_label)
    plt.ylim(y_lim)
    plt.xlabel('')  # Remove x-axis label
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close()

# Create boxplots for total absorbed PAR (high and low densities) and sensors (high and low densities)
create_boxplot(data_high_totalPAR_plants, 'absorbedPAR_umol_m2_s1', 'combined_total_absorbedPAR_high_cleaned', 'plots/boxplot_total_absorbedPAR_high_density.png', significance_letters_high_plant)
create_boxplot(data_low_totalPAR_plants, 'absorbedPAR_umol_m2_s1', 'combined_total_absorbedPAR_high_cleaned', 'plots/boxplot_total_absorbedPAR_low_density.png', significance_letters_low_plant)
create_boxplot(data_high_sensors, 'absorbedPAR_umol_m2_s1', 'combined_high_sensors_cleaned', 'plots/boxplot_absorbedPAR_sensors_high_density.png', significance_letters_high_sensors)
create_boxplot(data_low_sensors, 'absorbedPAR_umol_m2_s1', 'combined_low_sensors_cleaned', 'plots/boxplot_absorbedPAR_sensors_low_density.png', significance_letters_low_sensors)

print("Boxplots generated successfully for high and low densities.")

  plt.boxplot(data_to_plot, labels=architectures)
  plt.boxplot(data_to_plot, labels=architectures)
  plt.boxplot(data_to_plot, labels=architectures)
  plt.boxplot(data_to_plot, labels=architectures)


Boxplots generated successfully for high and low densities.


In [25]:
'''PART 5: BAR PLOT OF RANK DATA OF TOTAL ABS PAR FOR PLANTS AND SENSORS'''

import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import rankdata

# Load datasets for high and low densities
data_high_totalPAR_plants = pd.read_csv('combined_files/combined_total_absorbedPAR_high_cleaned.csv')
data_low_totalPAR_plants = pd.read_csv('combined_files/combined_total_absorbedPAR_low_cleaned.csv')
data_high_sensors = pd.read_csv('combined_files/combined_high_sensors_cleaned.csv')
data_low_sensors = pd.read_csv('combined_files/combined_low_sensors_cleaned.csv')

# Function to calculate ranks
def calculate_ranks(data):
    # Rank the absorbed PAR values
    data['rank'] = rankdata(data['absorbedPAR_umol_m2_s1'])
    return data

# Apply rank calculation for both high-density and low-density datasets
data_high_totalPAR_plants = calculate_ranks(data_high_totalPAR_plants)
data_low_totalPAR_plants = calculate_ranks(data_low_totalPAR_plants)
data_high_sensors = calculate_ranks(data_high_sensors)
data_low_sensors = calculate_ranks(data_low_sensors)

# Function to calculate mean rank and standard deviation for each architecture
def calculate_statistics(data):
    grouped = data.groupby(['architecture'])['rank']
    mean_values = grouped.mean().reset_index(name='mean_rank')
    mean_values['std_dev'] = grouped.std().reset_index(name='std_dev')['std_dev']
    mean_values['n'] = grouped.count().reset_index(name='count')['count']
        
    return mean_values

# Calculate statistics for both high-density and low-density datasets
mean_values_high_totalPAR_plants = calculate_statistics(data_high_totalPAR_plants)
mean_values_low_totalPAR_plants = calculate_statistics(data_low_totalPAR_plants)
mean_values_high_sensors = calculate_statistics(data_high_sensors)
mean_values_low_sensors = calculate_statistics(data_low_sensors)

def barplot_ranked_absorbedPAR(mean_values, density_label, output_file, significance_letters):
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Plotting bar plot for each architecture type within the current density
    for architecture in mean_values['architecture'].unique():
        architecture_data = mean_values[mean_values['architecture'] == architecture]
        mean_rank = architecture_data['mean_rank'].iloc[0]
        std_dev = architecture_data['std_dev'].iloc[0]
        
        # Plotting the bar
        ax.bar(architecture, mean_rank, yerr=std_dev, 
               width=0.5, edgecolor='black', linewidth=0.5, capsize=5, color='green')
        
        # Get the significance letter for the current architecture
        significance_letter = significance_letters.get(architecture, '')

        # Calculate the position for the significance letter
        offset = std_dev * 0.1  # 10% of the error bar length
        y_position = mean_rank + std_dev + offset
        
        # Adding the significance letter above the error bar
        ax.text(architecture, y_position, s=significance_letter, 
                ha='center', va='bottom', fontsize=12)


    # Set the title and labels
    #ax.set_title(f'Total Ranked Absorbed PAR for {density_label} density')

    # Automatically determine the y-axis label based on the output file name
    if 'sensors' in output_file.lower():
        y_label = 'Ranked absorbed PAR sensors (umol m-2 s-1)'
    else:
        y_label = 'Ranked absorbed PAR per plant (umol m-2 s-1)'

    ax.set_ylabel(y_label)
    plt.xticks(rotation=45, fontsize=10)

    # Save the plot
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close(fig)

# Example dictionaries containing significance letters for architectures
significance_letters_high_plant_ranked = {
    'architecture_A': 'a',
    'architecture_B': 'b',
    'architecture_C': 'b',
    'architecture_D': 'b',
    'architecture_E': 'a',
    'architecture_F': 'c',
    'control': 'a'
}

significance_letters_low_plant_ranked = {
    'architecture_A': 'ab',
    'architecture_B': 'c',
    'architecture_C': 'c',
    'architecture_D': 'c',
    'architecture_E': 'a',
    'architecture_F': 'd',
    'control': 'b'
}
significance_letters_high_sensors_ranked= {
    'architecture_A': 'a',
    'architecture_B': 'b',
    'architecture_C': 'c',
    'architecture_D': 'ad',
    'architecture_E': 'e',
    'architecture_F': 'f',
    'control': 'bd'
}

significance_letters_low_sensors_ranked = {
    'architecture_A': 'a',
    'architecture_B': 'a',
    'architecture_C': 'b',
    'architecture_D': 'a',
    'architecture_E': 'c',
    'architecture_F': 'd',
    'control': 'a'
}

# Call the barplot function with significance letters
barplot_ranked_absorbedPAR(mean_values_high_totalPAR_plants, 'High', 'plots/ranked_total_absorbedPAR_high.png', significance_letters_high_plant_ranked)
barplot_ranked_absorbedPAR(mean_values_low_totalPAR_plants, 'Low', 'plots/ranked_total_absorbedPAR_low.png', significance_letters_low_plant_ranked)
barplot_ranked_absorbedPAR(mean_values_high_sensors, 'High', 'plots/ranked_sensors_high.png', significance_letters_high_sensors_ranked)
barplot_ranked_absorbedPAR(mean_values_low_sensors, 'Low', 'plots/ranked_sensors_low.png', significance_letters_low_sensors_ranked)

print("Ranked plots with significance letters generated successfully for high and low densities.")



Ranked plots with significance letters generated successfully for high and low densities.
