This notebook will assess the correlations between volume estimates and plant structure parameters. The following procedure will be implemented:
1. Create a unified df for volume and structure data.
2. Subset unified df into tomato, watermelon, and total dfs.
3. Create regression equations for linear, exponential, logarithmic, power, and polynomial regression.
4. Create a df for each of the regression coefficients, R2 values, and RMSE values.
5. Fit each structure and volume pair to each regression for each subset and calc stats.
6. Determine the highest R2 and lowest RMSE scoring equations for each subset's structures.
7. Create figures for each volume x structure comparing both volume calcs and regression methods.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error

In [2]:
# Import dfs
bdf = pd.read_csv('biomass_scrape_c3.csv')
vdf = pd.read_csv('volume_directory.csv')

In [3]:
print(bdf.columns)
print(vdf.columns)

Index(['Sample ID', 'Plant ID', 'Cultivar', 'Species', 'Sampling Age',
       'Sampling Date', 'Leaf No', 'Stem No', 'Leaves Per Stem', 'Flower No',
       'Total Fruit No', 'SLA', 'Leaf Biomass', 'Shoot Biomass',
       'Flower Biomass', 'Ripe Fruit No', 'Ripe Fruit Biomass',
       'Total Fruit Biomass', 'Aboveground Biomass', 'true_ground_area',
       'L#PA', 'Unripe Fruit No', 'Unripe Fruit Biomass', 'Leaflet No',
       'Stem and Rachi No'],
      dtype='object')
Index(['sample_id', 'plot_id', 'trial_dir', 'date_dir', 'species', 'width',
       'depth', 'height', 'voxel_vol_02', 'voxel_vol_01', 'voxel_vol_005',
       'voxel_vol_002', 'voxel_vol_001', 'convh_vol', 'ground_area'],
      dtype='object')


In [None]:
# Convert bdf to useful format
bdf.columns = bdf.columns.str.lower()
bdf.columns = bdf.columns.str.replace(' ', '_')
bdf.rename(columns={'aboveground_biomass':'aboveground_veg_biomass',
                    'stem_no':'shoot_no'}, inplace=True)
bdf['aboveground_total_biomass'] = bdf['aboveground_veg_biomass'] + bdf['flower_biomass'] + bdf['total_fruit_biomass']
print(bdf.columns)
bdf.drop(columns=['sampling_date', 'leaves_per_stem', 'sla', 
                  'ripe_fruit_no', 'ripe_fruit_biomass', 'species'], inplace=True)
print(bdf.columns)

Index(['sample_id', 'plant_id', 'cultivar', 'species', 'sampling_age',
       'sampling_date', 'leaf_no', 'shoot_no', 'leaves_per_stem', 'flower_no',
       'total_fruit_no', 'sla', 'leaf_biomass', 'shoot_biomass',
       'flower_biomass', 'ripe_fruit_no', 'ripe_fruit_biomass',
       'total_fruit_biomass', 'aboveground_veg_biomass', 'true_ground_area',
       'l#pa', 'unripe_fruit_no', 'unripe_fruit_biomass', 'leaflet_no',
       'stem_and_rachi_no', 'aboveground_total_biomass'],
      dtype='object')
Index(['sample_id', 'plant_id', 'cultivar', 'sampling_age', 'leaf_no',
       'shoot_no', 'flower_no', 'total_fruit_no', 'leaf_biomass',
       'shoot_biomass', 'flower_biomass', 'total_fruit_biomass',
       'aboveground_veg_biomass', 'true_ground_area', 'l#pa',
       'unripe_fruit_no', 'unripe_fruit_biomass', 'aboveground_total_biomass'],
      dtype='object')


In [5]:
# combine bdf and vdf by sample_id
df = pd.merge(bdf, vdf, on='sample_id', how='inner')
print(df.columns)
print(len(df))
print(df.head())

Index(['sample_id', 'plant_id', 'cultivar', 'sampling_age', 'leaf_no',
       'shoot_no', 'flower_no', 'total_fruit_no', 'leaf_biomass',
       'shoot_biomass', 'flower_biomass', 'total_fruit_biomass',
       'aboveground_veg_biomass', 'true_ground_area', 'l#pa',
       'unripe_fruit_no', 'unripe_fruit_biomass', 'aboveground_total_biomass',
       'plot_id', 'trial_dir', 'date_dir', 'species', 'width', 'depth',
       'height', 'voxel_vol_02', 'voxel_vol_01', 'voxel_vol_005',
       'voxel_vol_002', 'voxel_vol_001', 'convh_vol', 'ground_area'],
      dtype='object')
24
              sample_id                         plant_id  cultivar  \
0  ta_Big_Beef_Day_Zero  Transplant-Aged Big Beef Tomato  Big Beef   
1      gp_21_07_07_2023                               21  Big Beef   
2      gp_09_06_24_2023                               09  Big Beef   
3      gp_07_08_02_2023                               07  Big Beef   
4      gp_05_09_01_2023                               05  Big Beef   

   

In [6]:
# Drop problem rows
problem_samples = ['ta_Big_Beef_Day_Zero', 'ta_German_Johnson_Day_Zero',
                   'ta_Red_Deuce_Day_Zero', 'ta_Mambo_Day_Zero', 'gp_12_06_24_2023']
bdf = bdf[~bdf['sample_id'].isin(problem_samples)]

In [7]:
# Subset df into species dfs
tdf = df[df['species'] == 'tomato']
wdf = df[df['species'] == 'watermelon']

In [8]:
# Create regression equations
def linear(x, a):
    return a*x

def exponential(x, a, b):
    return a*np.exp(-b*x)

def power(x, a, b):
    return a*x**b

def log(x, a, b):
    return a*np.log(b*x + 1)

def poly2deg(x, a, b):
    return a*x**2 + b*x

In [9]:
# Create a df for regression evaluation
r_cols = ['subset', 'parameter', 'volume_method',
          'linear_a', 'linear_r2','linear_rmse', 'linear_rrmse',
          'exponential_a', 'exponential_b', 'exponential_r2','exponential_rmse', 'exponential_rrmse',
          'power_a', 'power_b', 'power_r2','power_rmse', 'power_rrmse',
          'log_a', 'log_b', 'log_r2','log_rmse', 'log_rrmse',
          'poly2deg_a', 'poly2deg_b', 'poly2deg_r2','poly2deg_rmse', 'poly2deg_rrmse']
rdf = pd.DataFrame(columns=r_cols)

In [10]:
# Set combinations
b_params = ['leaf_biomass', 'shoot_biomass', 'aboveground_veg_biomass']
n_params = ['leaf_no', 'shoot_no']
v_methods = ['voxel_vol_02', 'voxel_vol_01', 'voxel_vol_005', 'voxel_vol_002', 'voxel_vol_001', 'convh_vol']
subset_list = [[df, 'both_species'],
               [tdf, 'tomato'],
               [wdf, 'watermelon']]

In [11]:
# Define curve fit and leave one out cross validation function
def loo_opt(x, y, func):
    x = x.reset_index(drop=True)
    y = y.reset_index(drop=True)
    
    loo = LeaveOneOut()
    y_true = []
    y_pred = []
    popts = []

    for train_index, test_index in loo.split(x):
        x_train, x_test = x.iloc[train_index], x.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        # Fit model
        popt, _ = curve_fit(func, x_train.values.ravel(), y_train.values.ravel(), maxfev=80000)
        
        # Predict
        y_pred_i = func(x_test.values.ravel(), *popt)[0]
        
        # Store results
        y_true.append(y_test.values[0])
        y_pred.append(y_pred_i)
        popts.append(popt)

    # Compute RRMSE
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    return y_true, y_pred, popts

In [12]:
# Define row function
def regressions (param, v_method, subset):
    global rdf

    sdf = subset[0]
    subset_name = subset[1]

    # set up x and y
    xdata = sdf[v_method]
    ydata = sdf[param]

    # set up row
    row = pd.DataFrame(columns=r_cols)
    row['subset'] = [subset_name]
    row['parameter'] = [param]
    row['volume_method'] = [v_method]

    # linear
    print('Starting linear regression fit for ' + param + ' and ' + v_method + ' in ' + subset_name)
    y_true, y_pred, popts = loo_opt(xdata, ydata, linear)
    row['linear_a'] = np.mean([popt[0] for popt in popts])
    row['linear_r2'] = r2_score(y_true, y_pred)
    row['linear_rmse'] = np.sqrt(mean_squared_error(y_true, y_pred))
    row['linear_rrmse'] = row['linear_rmse'] / np.mean(y_true)
    print('Conducted linear regression fit for ' + param + ' and ' + v_method + ' in ' + subset_name)

    # exponential
    y_true, y_pred, popts = loo_opt(xdata, ydata, exponential)
    row['exponential_a'] = np.mean([popt[0] for popt in popts])
    row['exponential_b'] = np.mean([popt[1] for popt in popts])
    row['exponential_r2'] = r2_score(y_true, y_pred)
    row['exponential_rmse'] = np.sqrt(mean_squared_error(y_true, y_pred))
    row['exponential_rrmse'] = row['exponential_rmse'] / np.mean(y_true)
    print('Conducted exponential regression fit for ' + param + ' and ' + v_method + ' in ' + subset_name)

    # power
    y_true, y_pred, popts = loo_opt(xdata, ydata, power)
    row['power_a'] = np.mean([popt[0] for popt in popts])
    row['power_b'] = np.mean([popt[1] for popt in popts])
    row['power_r2'] = r2_score(y_true, y_pred)
    row['power_rmse'] = np.sqrt(mean_squared_error(y_true, y_pred))
    row['power_rrmse'] = row['power_rmse'] / np.mean(y_true)
    print('Conducted power regression fit for ' + param + ' and ' + v_method + ' in ' + subset_name)

    # log
    y_true, y_pred, popts = loo_opt(xdata, ydata, log)
    row['log_a'] = np.mean([popt[0] for popt in popts])
    row['log_b'] = np.mean([popt[1] for popt in popts])
    row['log_r2'] = r2_score(y_true, y_pred)
    row['log_rmse'] = np.sqrt(mean_squared_error(y_true, y_pred))
    row['log_rrmse'] = row['log_rmse'] / np.mean(y_true)
    print('Conducted log regression fit for ' + param + ' and ' + v_method + ' in ' + subset_name)

    # poly2deg
    y_true, y_pred, popts = loo_opt(xdata, ydata, poly2deg)
    row['poly2deg_a'] = np.mean([popt[0] for popt in popts])
    row['poly2deg_b'] = np.mean([popt[1] for popt in popts])
    row['poly2deg_r2'] = r2_score(y_true, y_pred)
    row['poly2deg_rmse'] = np.sqrt(mean_squared_error(y_true, y_pred))
    row['poly2deg_rrmse'] = row['poly2deg_rmse'] / np.mean(y_true)
    print('Conducted poly2deg regression fit for ' + param + ' and ' + v_method + ' in ' + subset_name)

    # append row to rdf
    rdf = pd.concat([row, rdf], ignore_index=True)

    return

In [13]:
# Run regressions
for subset in subset_list:
    for param in b_params:
        for v_method in v_methods:
            regressions(param, v_method, subset)
    print('Finished biomass regressions for ' + subset[1])
    if subset[1] != 'both_species':
        for param in n_params:
            for v_method in v_methods:
                regressions(param, v_method, subset)

Starting linear regression fit for leaf_biomass and voxel_vol_02 in both_species
Conducted linear regression fit for leaf_biomass and voxel_vol_02 in both_species
Conducted exponential regression fit for leaf_biomass and voxel_vol_02 in both_species
Conducted power regression fit for leaf_biomass and voxel_vol_02 in both_species


  return a*np.log(b*x + 1)
  rdf = pd.concat([row, rdf], ignore_index=True)


Conducted log regression fit for leaf_biomass and voxel_vol_02 in both_species
Conducted poly2deg regression fit for leaf_biomass and voxel_vol_02 in both_species
Starting linear regression fit for leaf_biomass and voxel_vol_01 in both_species
Conducted linear regression fit for leaf_biomass and voxel_vol_01 in both_species
Conducted exponential regression fit for leaf_biomass and voxel_vol_01 in both_species
Conducted power regression fit for leaf_biomass and voxel_vol_01 in both_species
Conducted log regression fit for leaf_biomass and voxel_vol_01 in both_species
Conducted poly2deg regression fit for leaf_biomass and voxel_vol_01 in both_species
Starting linear regression fit for leaf_biomass and voxel_vol_005 in both_species
Conducted linear regression fit for leaf_biomass and voxel_vol_005 in both_species
Conducted exponential regression fit for leaf_biomass and voxel_vol_005 in both_species
Conducted power regression fit for leaf_biomass and voxel_vol_005 in both_species
Conducte

  return a*x**b
  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)


Conducted log regression fit for leaf_biomass and voxel_vol_001 in both_species
Conducted poly2deg regression fit for leaf_biomass and voxel_vol_001 in both_species
Starting linear regression fit for leaf_biomass and convh_vol in both_species
Conducted linear regression fit for leaf_biomass and convh_vol in both_species
Conducted exponential regression fit for leaf_biomass and convh_vol in both_species
Conducted power regression fit for leaf_biomass and convh_vol in both_species
Conducted log regression fit for leaf_biomass and convh_vol in both_species
Conducted poly2deg regression fit for leaf_biomass and convh_vol in both_species
Starting linear regression fit for shoot_biomass and voxel_vol_02 in both_species
Conducted linear regression fit for shoot_biomass and voxel_vol_02 in both_species
Conducted exponential regression fit for shoot_biomass and voxel_vol_02 in both_species
Conducted power regression fit for shoot_biomass and voxel_vol_02 in both_species
Conducted log regression

  return a*np.log(b*x + 1)


Conducted poly2deg regression fit for shoot_biomass and voxel_vol_02 in both_species
Starting linear regression fit for shoot_biomass and voxel_vol_01 in both_species
Conducted linear regression fit for shoot_biomass and voxel_vol_01 in both_species
Conducted exponential regression fit for shoot_biomass and voxel_vol_01 in both_species
Conducted power regression fit for shoot_biomass and voxel_vol_01 in both_species
Conducted log regression fit for shoot_biomass and voxel_vol_01 in both_species
Conducted poly2deg regression fit for shoot_biomass and voxel_vol_01 in both_species
Starting linear regression fit for shoot_biomass and voxel_vol_005 in both_species
Conducted linear regression fit for shoot_biomass and voxel_vol_005 in both_species
Conducted exponential regression fit for shoot_biomass and voxel_vol_005 in both_species
Conducted power regression fit for shoot_biomass and voxel_vol_005 in both_species
Conducted log regression fit for shoot_biomass and voxel_vol_005 in both_spe

  return a*x**b
  return a*x**b
  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)


Conducted log regression fit for shoot_biomass and voxel_vol_001 in both_species
Conducted poly2deg regression fit for shoot_biomass and voxel_vol_001 in both_species
Starting linear regression fit for shoot_biomass and convh_vol in both_species
Conducted linear regression fit for shoot_biomass and convh_vol in both_species
Conducted exponential regression fit for shoot_biomass and convh_vol in both_species
Conducted power regression fit for shoot_biomass and convh_vol in both_species
Conducted log regression fit for shoot_biomass and convh_vol in both_species
Conducted poly2deg regression fit for shoot_biomass and convh_vol in both_species
Starting linear regression fit for aboveground_veg_biomass and voxel_vol_02 in both_species
Conducted linear regression fit for aboveground_veg_biomass and voxel_vol_02 in both_species
Conducted exponential regression fit for aboveground_veg_biomass and voxel_vol_02 in both_species
Conducted power regression fit for aboveground_veg_biomass and voxel

  return a*x**b
  return a*x**b
  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)


Conducted power regression fit for aboveground_veg_biomass and voxel_vol_001 in both_species
Conducted log regression fit for aboveground_veg_biomass and voxel_vol_001 in both_species
Conducted poly2deg regression fit for aboveground_veg_biomass and voxel_vol_001 in both_species
Starting linear regression fit for aboveground_veg_biomass and convh_vol in both_species
Conducted linear regression fit for aboveground_veg_biomass and convh_vol in both_species
Conducted exponential regression fit for aboveground_veg_biomass and convh_vol in both_species
Conducted power regression fit for aboveground_veg_biomass and convh_vol in both_species
Conducted log regression fit for aboveground_veg_biomass and convh_vol in both_species
Conducted poly2deg regression fit for aboveground_veg_biomass and convh_vol in both_species
Finished biomass regressions for both_species
Starting linear regression fit for leaf_biomass and voxel_vol_02 in tomato
Conducted linear regression fit for leaf_biomass and voxe

  return a*np.log(b*x + 1)


Conducted power regression fit for leaf_biomass and voxel_vol_01 in tomato
Conducted log regression fit for leaf_biomass and voxel_vol_01 in tomato
Conducted poly2deg regression fit for leaf_biomass and voxel_vol_01 in tomato
Starting linear regression fit for leaf_biomass and voxel_vol_005 in tomato
Conducted linear regression fit for leaf_biomass and voxel_vol_005 in tomato
Conducted exponential regression fit for leaf_biomass and voxel_vol_005 in tomato
Conducted power regression fit for leaf_biomass and voxel_vol_005 in tomato
Conducted log regression fit for leaf_biomass and voxel_vol_005 in tomato
Conducted poly2deg regression fit for leaf_biomass and voxel_vol_005 in tomato
Starting linear regression fit for leaf_biomass and voxel_vol_002 in tomato
Conducted linear regression fit for leaf_biomass and voxel_vol_002 in tomato
Conducted exponential regression fit for leaf_biomass and voxel_vol_002 in tomato
Conducted power regression fit for leaf_biomass and voxel_vol_002 in tomato

  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)


Conducted log regression fit for leaf_biomass and convh_vol in tomato
Conducted poly2deg regression fit for leaf_biomass and convh_vol in tomato
Starting linear regression fit for shoot_biomass and voxel_vol_02 in tomato
Conducted linear regression fit for shoot_biomass and voxel_vol_02 in tomato
Conducted exponential regression fit for shoot_biomass and voxel_vol_02 in tomato
Conducted power regression fit for shoot_biomass and voxel_vol_02 in tomato
Conducted log regression fit for shoot_biomass and voxel_vol_02 in tomato
Conducted poly2deg regression fit for shoot_biomass and voxel_vol_02 in tomato
Starting linear regression fit for shoot_biomass and voxel_vol_01 in tomato
Conducted linear regression fit for shoot_biomass and voxel_vol_01 in tomato
Conducted exponential regression fit for shoot_biomass and voxel_vol_01 in tomato
Conducted power regression fit for shoot_biomass and voxel_vol_01 in tomato
Conducted log regression fit for shoot_biomass and voxel_vol_01 in tomato
Conduc

  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)


Conducted log regression fit for shoot_biomass and convh_vol in tomato
Conducted poly2deg regression fit for shoot_biomass and convh_vol in tomato
Starting linear regression fit for aboveground_veg_biomass and voxel_vol_02 in tomato
Conducted linear regression fit for aboveground_veg_biomass and voxel_vol_02 in tomato
Conducted exponential regression fit for aboveground_veg_biomass and voxel_vol_02 in tomato
Conducted power regression fit for aboveground_veg_biomass and voxel_vol_02 in tomato
Conducted log regression fit for aboveground_veg_biomass and voxel_vol_02 in tomato
Conducted poly2deg regression fit for aboveground_veg_biomass and voxel_vol_02 in tomato
Starting linear regression fit for aboveground_veg_biomass and voxel_vol_01 in tomato
Conducted linear regression fit for aboveground_veg_biomass and voxel_vol_01 in tomato
Conducted exponential regression fit for aboveground_veg_biomass and voxel_vol_01 in tomato
Conducted power regression fit for aboveground_veg_biomass and v

  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)
  return a*np.log(b*x + 1)


Conducted log regression fit for leaf_no and voxel_vol_02 in tomato
Conducted poly2deg regression fit for leaf_no and voxel_vol_02 in tomato
Starting linear regression fit for leaf_no and voxel_vol_01 in tomato
Conducted linear regression fit for leaf_no and voxel_vol_01 in tomato
Conducted exponential regression fit for leaf_no and voxel_vol_01 in tomato
Conducted power regression fit for leaf_no and voxel_vol_01 in tomato
Conducted log regression fit for leaf_no and voxel_vol_01 in tomato
Conducted poly2deg regression fit for leaf_no and voxel_vol_01 in tomato
Starting linear regression fit for leaf_no and voxel_vol_005 in tomato
Conducted linear regression fit for leaf_no and voxel_vol_005 in tomato
Conducted exponential regression fit for leaf_no and voxel_vol_005 in tomato
Conducted power regression fit for leaf_no and voxel_vol_005 in tomato
Conducted log regression fit for leaf_no and voxel_vol_005 in tomato
Conducted poly2deg regression fit for leaf_no and voxel_vol_005 in toma

  return a*np.log(b*x + 1)


Conducted log regression fit for leaf_no and convh_vol in tomato
Conducted poly2deg regression fit for leaf_no and convh_vol in tomato
Starting linear regression fit for shoot_no and voxel_vol_02 in tomato
Conducted linear regression fit for shoot_no and voxel_vol_02 in tomato
Conducted exponential regression fit for shoot_no and voxel_vol_02 in tomato
Conducted power regression fit for shoot_no and voxel_vol_02 in tomato
Conducted log regression fit for shoot_no and voxel_vol_02 in tomato
Conducted poly2deg regression fit for shoot_no and voxel_vol_02 in tomato
Starting linear regression fit for shoot_no and voxel_vol_01 in tomato
Conducted linear regression fit for shoot_no and voxel_vol_01 in tomato
Conducted exponential regression fit for shoot_no and voxel_vol_01 in tomato
Conducted power regression fit for shoot_no and voxel_vol_01 in tomato
Conducted log regression fit for shoot_no and voxel_vol_01 in tomato
Conducted poly2deg regression fit for shoot_no and voxel_vol_01 in toma

  return a*np.log(b*x + 1)


Conducted power regression fit for leaf_biomass and voxel_vol_02 in watermelon
Conducted log regression fit for leaf_biomass and voxel_vol_02 in watermelon
Conducted poly2deg regression fit for leaf_biomass and voxel_vol_02 in watermelon
Starting linear regression fit for leaf_biomass and voxel_vol_01 in watermelon
Conducted linear regression fit for leaf_biomass and voxel_vol_01 in watermelon
Conducted exponential regression fit for leaf_biomass and voxel_vol_01 in watermelon
Conducted power regression fit for leaf_biomass and voxel_vol_01 in watermelon
Conducted log regression fit for leaf_biomass and voxel_vol_01 in watermelon
Conducted poly2deg regression fit for leaf_biomass and voxel_vol_01 in watermelon
Starting linear regression fit for leaf_biomass and voxel_vol_005 in watermelon
Conducted linear regression fit for leaf_biomass and voxel_vol_005 in watermelon
Conducted exponential regression fit for leaf_biomass and voxel_vol_005 in watermelon
Conducted power regression fit fo

  return a*x**b
  return a*x**b
  popt, _ = curve_fit(func, x_train.values.ravel(), y_train.values.ravel(), maxfev=80000)
  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)
  return a*np.log(b*x + 1)


Conducted log regression fit for shoot_biomass and voxel_vol_02 in watermelon
Conducted poly2deg regression fit for shoot_biomass and voxel_vol_02 in watermelon
Starting linear regression fit for shoot_biomass and voxel_vol_01 in watermelon
Conducted linear regression fit for shoot_biomass and voxel_vol_01 in watermelon
Conducted exponential regression fit for shoot_biomass and voxel_vol_01 in watermelon
Conducted power regression fit for shoot_biomass and voxel_vol_01 in watermelon
Conducted log regression fit for shoot_biomass and voxel_vol_01 in watermelon
Conducted poly2deg regression fit for shoot_biomass and voxel_vol_01 in watermelon
Starting linear regression fit for shoot_biomass and voxel_vol_005 in watermelon
Conducted linear regression fit for shoot_biomass and voxel_vol_005 in watermelon
Conducted exponential regression fit for shoot_biomass and voxel_vol_005 in watermelon
Conducted power regression fit for shoot_biomass and voxel_vol_005 in watermelon
Conducted log regres

  return a*x**b
  return a*x**b
  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)
  return a*np.log(b*x + 1)


Conducted linear regression fit for aboveground_veg_biomass and voxel_vol_02 in watermelon
Conducted exponential regression fit for aboveground_veg_biomass and voxel_vol_02 in watermelon
Conducted power regression fit for aboveground_veg_biomass and voxel_vol_02 in watermelon
Conducted log regression fit for aboveground_veg_biomass and voxel_vol_02 in watermelon
Conducted poly2deg regression fit for aboveground_veg_biomass and voxel_vol_02 in watermelon
Starting linear regression fit for aboveground_veg_biomass and voxel_vol_01 in watermelon
Conducted linear regression fit for aboveground_veg_biomass and voxel_vol_01 in watermelon
Conducted exponential regression fit for aboveground_veg_biomass and voxel_vol_01 in watermelon
Conducted power regression fit for aboveground_veg_biomass and voxel_vol_01 in watermelon
Conducted log regression fit for aboveground_veg_biomass and voxel_vol_01 in watermelon
Conducted poly2deg regression fit for aboveground_veg_biomass and voxel_vol_01 in water

  return a*x**b
  return a*x**b
  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)
  return a*np.log(b*x + 1)


Conducted poly2deg regression fit for aboveground_veg_biomass and voxel_vol_001 in watermelon
Starting linear regression fit for aboveground_veg_biomass and convh_vol in watermelon
Conducted linear regression fit for aboveground_veg_biomass and convh_vol in watermelon
Conducted exponential regression fit for aboveground_veg_biomass and convh_vol in watermelon
Conducted power regression fit for aboveground_veg_biomass and convh_vol in watermelon
Conducted log regression fit for aboveground_veg_biomass and convh_vol in watermelon
Conducted poly2deg regression fit for aboveground_veg_biomass and convh_vol in watermelon
Finished biomass regressions for watermelon
Starting linear regression fit for leaf_no and voxel_vol_02 in watermelon
Conducted linear regression fit for leaf_no and voxel_vol_02 in watermelon
Conducted exponential regression fit for leaf_no and voxel_vol_02 in watermelon
Conducted power regression fit for leaf_no and voxel_vol_02 in watermelon
Conducted log regression fit 

  return a*np.log(b*x + 1)
  return a*np.log(b*x + 1)
  return a*x**b
  return a*x**b


Conducted log regression fit for leaf_no and voxel_vol_01 in watermelon
Conducted poly2deg regression fit for leaf_no and voxel_vol_01 in watermelon
Starting linear regression fit for leaf_no and voxel_vol_005 in watermelon
Conducted linear regression fit for leaf_no and voxel_vol_005 in watermelon
Conducted exponential regression fit for leaf_no and voxel_vol_005 in watermelon
Conducted power regression fit for leaf_no and voxel_vol_005 in watermelon
Conducted log regression fit for leaf_no and voxel_vol_005 in watermelon
Conducted poly2deg regression fit for leaf_no and voxel_vol_005 in watermelon
Starting linear regression fit for leaf_no and voxel_vol_002 in watermelon
Conducted linear regression fit for leaf_no and voxel_vol_002 in watermelon
Conducted exponential regression fit for leaf_no and voxel_vol_002 in watermelon
Conducted power regression fit for leaf_no and voxel_vol_002 in watermelon
Conducted log regression fit for leaf_no and voxel_vol_002 in watermelon
Conducted pol

  return a*np.exp(-b*x)
  return a*np.exp(-b*x)
  return a*np.log(b*x + 1)
  return a*x**b


Conducted log regression fit for shoot_no and voxel_vol_01 in watermelon
Conducted poly2deg regression fit for shoot_no and voxel_vol_01 in watermelon
Starting linear regression fit for shoot_no and voxel_vol_005 in watermelon
Conducted linear regression fit for shoot_no and voxel_vol_005 in watermelon
Conducted exponential regression fit for shoot_no and voxel_vol_005 in watermelon
Conducted power regression fit for shoot_no and voxel_vol_005 in watermelon
Conducted log regression fit for shoot_no and voxel_vol_005 in watermelon
Conducted poly2deg regression fit for shoot_no and voxel_vol_005 in watermelon
Starting linear regression fit for shoot_no and voxel_vol_002 in watermelon
Conducted linear regression fit for shoot_no and voxel_vol_002 in watermelon
Conducted exponential regression fit for shoot_no and voxel_vol_002 in watermelon
Conducted power regression fit for shoot_no and voxel_vol_002 in watermelon
Conducted log regression fit for shoot_no and voxel_vol_002 in watermelon


  return a*x**b
  return a*x**b
  return a*np.log(b*x + 1)


In [14]:
print(df)

                     sample_id                               plant_id  \
0         ta_Big_Beef_Day_Zero        Transplant-Aged Big Beef Tomato   
1             gp_21_07_07_2023                                     21   
2             gp_09_06_24_2023                                     09   
3             gp_07_08_02_2023                                     07   
4             gp_05_09_01_2023                                     05   
5   ta_German_Johnson_Day_Zero  Transplant-Aged German Johnson Tomato   
6             gp_18_07_07_2023                                     18   
7             gp_17_08_02_2023                                     17   
8             gp_19_09_01_2023                                     19   
9             gp_20_10_02_2023                                     20   
10           ta_Mambo_Day_Zero       Transplant-Aged Mambo Watermelon   
11            gp_24_07_07_2023                                     24   
12            gp_15_06_24_2023                     

In [18]:
# Get best regression equation for each parameter and volume method combination
rdf['best_r2'] = rdf[['linear_r2', 'exponential_r2', 'power_r2', 'log_r2', 'poly2deg_r2']].max(axis=1)
rdf['best_rmse'] = rdf[['linear_rmse', 'exponential_rmse', 'power_rmse', 'log_rmse', 'poly2deg_rmse']].min(axis=1)
rdf['best_rrmse'] = rdf[['linear_rrmse', 'exponential_rrmse', 'power_rrmse', 'log_rrmse', 'poly2deg_rrmse']].min(axis=1)
rdf['best_equation'] = rdf[['linear_rrmse', 'exponential_rrmse', 'power_rrmse', 'log_rrmse', 'poly2deg_rrmse']].idxmin(axis=1)
rdf['best_equation'] = rdf['best_equation'].str.replace('_rrmse', '')

In [19]:
# Sort rdf by subset, parameter, and best rrmse
rdf.sort_values(by=['subset', 'parameter', 'best_rrmse'], ascending=[True, True, True], inplace=True)

In [20]:
# Save rdf
rdf.to_csv('Biomass_and_Volume_Regressions/bv_regressions_cx3_subset_2.csv', index=False)