In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in the data
df = pd.read_csv('biomass_scrape_corrected.csv')

In [3]:
print(df.columns)

Index(['Sample ID', 'Plant ID', 'Cultivar', 'Species', 'Sampling Age',
       'Sampling Date', 'Leaflet No', 'Leaf No', 'Stem and Rachi No',
       'Stem No', 'Leaves Per Stem', 'Flower No', 'Total Fruit No', 'SLA',
       'Leaf Biomass', 'Shoot Biomass', 'Flower Biomass', 'Ripe Fruit No',
       'Ripe Fruit Biomass', 'Total Fruit Biomass', 'Aboveground Biomass'],
      dtype='object')


In [4]:
# sum leaf biomass, stem biomass, flower biomass, and total fruit biomass for each row
df['aboveground_total_biomass'] = df['Leaf Biomass'] + df['Shoot Biomass'] + df['Flower Biomass'] + df['Total Fruit Biomass']

In [5]:
# Set up summary dataframe
s_cols = ['species', 'sampling_age', 'aboveground_veg_biomass', 'avb_r',
        'leaf_biomass', 'lb_r',
        'stem_biomass', 'sb_r',
          'flower_biomass', 'fb_r',
          'fruit_biomass', 'frb_r',
          'leaf_number', 'ln_r',
          'stem_number', 'sn_r',
          'aboveground_total_biomass', 'atb_r']
summary_df = pd.DataFrame(columns=s_cols)

In [6]:
# Loop through each species and sampling age
for species in df['Species'].unique():
    for sampling_age in df['Sampling Age'].unique():
        # Subset the dataframe
        sub_df = df[(df['Species'] == species) & (df['Sampling Age'] == sampling_age)]
        # Get the number of samples
        n = len(sub_df)
        if n != 0:
            # Get the mean biomass values
            aboveground_veg_biomass = round(np.mean(sub_df['Aboveground Biomass']), 2)
            abvb_min = round(np.min(sub_df['Aboveground Biomass']), 2)
            abvb_max = round(np.max(sub_df['Aboveground Biomass']), 2)
            leaf_biomass = round(np.mean(sub_df['Leaf Biomass']), 2)
            lb_min = round(np.min(sub_df['Leaf Biomass']), 2)
            lb_max = round(np.max(sub_df['Leaf Biomass']), 2)
            stem_biomass = round(np.mean(sub_df['Shoot Biomass']), 2)
            sb_min = round(np.min(sub_df['Shoot Biomass']), 2)
            sb_max = round(np.max(sub_df['Shoot Biomass']), 2)
            flower_biomass = round(np.mean(sub_df['Flower Biomass']), 2)
            fb_min = round(np.min(sub_df['Flower Biomass']), 2)
            fb_max = round(np.max(sub_df['Flower Biomass']), 2)
            fruit_biomass = round(np.mean(sub_df['Total Fruit Biomass']), 2)
            frb_min = round(np.min(sub_df['Total Fruit Biomass']), 2)
            frb_max = round(np.max(sub_df['Total Fruit Biomass']), 2)
            leaf_number = int(np.mean(sub_df['Leaf No']))
            ln_min = int(np.min(sub_df['Leaf No']))
            ln_max = int(np.max(sub_df['Leaf No']))
            stem_number = int(np.mean(sub_df['Stem No']))
            sn_min = int(np.min(sub_df['Stem No']))
            sn_max = int(np.max(sub_df['Stem No']))
            aboveground_total_biomass = round(np.mean(sub_df['aboveground_total_biomass']), 2)
            atb_min = round(np.min(sub_df['aboveground_total_biomass']), 2)
            atb_max = round(np.max(sub_df['aboveground_total_biomass']), 2)

            # Add to the summary dataframe
            row = pd.DataFrame.from_dict({'species': species, 'sampling_age': sampling_age,
                                            'aboveground_veg_biomass': aboveground_veg_biomass,
                                            'avb_r': '{0}-{1}'.format(abvb_min, abvb_max),
                                            'leaf_biomass': leaf_biomass,
                                            'lb_r': '{0}-{1}'.format(lb_min, lb_max),
                                            'stem_biomass': stem_biomass,
                                            'sb_r': '{0}-{1}'.format(sb_min, sb_max),
                                            'flower_biomass': flower_biomass,
                                            'fb_r': '{0}-{1}'.format(fb_min, fb_max),
                                            'fruit_biomass': fruit_biomass,
                                            'frb_r': '{0}-{1}'.format(frb_min, frb_max),
                                            'leaf_number': leaf_number,
                                            'ln_r': '{0}-{1}'.format(ln_min, ln_max),
                                            'stem_number': stem_number,
                                            'sn_r': '{0}-{1}'.format(sn_min, sn_max),
                                            'aboveground_total_biomass': aboveground_total_biomass,
                                            'atb_r': '{0}-{1}'.format(atb_min, atb_max)},
                                            orient='index')
            row = row.transpose()
            summary_df = pd.concat([summary_df, row], ignore_index=True)

In [7]:
summary_df.sort_values(by=['species', 'sampling_age'], inplace=True)
print(summary_df)

      species sampling_age aboveground_veg_biomass          avb_r  \
0      Tomato            0                    4.62       3.0-7.58   
1      Tomato            2                   15.68    13.41-17.94   
2      Tomato            5                    57.0    40.44-66.55   
3      Tomato           10                  260.81  142.89-419.69   
4      Tomato           15                  426.35  199.18-724.07   
5  Watermelon            0                    2.84      2.46-3.23   
6  Watermelon            2                    14.4    12.54-16.27   
8  Watermelon            4                     3.9      1.88-5.91   
9  Watermelon            9                  479.34  433.62-525.05   
7  Watermelon           15                  616.69  458.63-774.76   

  leaf_biomass           lb_r stem_biomass           sb_r flower_biomass  \
0          2.1      1.26-3.64         2.52       1.6-3.94           0.06   
1          7.9      7.01-8.79         7.78       6.4-9.15            0.9   
2        25.

In [8]:
# Write to csv
summary_df.to_csv('biomass_summary.csv', index=False)