In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Read the data
#df = pd.read_csv('merged_phosfert_021125.csv')
df = pd.read_csv('data/yield_phosfert_merged.csv')

df.info()
df.head()


In [None]:
# Group by Variety, Year, and P_appln and calculate mean yields
yield_summary = df.groupby(['Year', 'P_appln']).agg({
    'No.1_wt': 'mean',
    'Can_wt': 'mean',
    'Jumbo_wt': 'mean',
    'Mkt_wt': 'mean'
}).reset_index()

yield_summary.info()
yield_summary.head()

In [None]:
# Save the summary to CSV
yield_summary.to_csv('yield_avgs_021825.csv', index=False)

# Define a mapping to rename the variables for plotting
name_mapping = {
    'No.1_wt': 'US_No.1',
    'Can_wt': 'Canner',
    'Jumbo_wt': 'Jumbo',
    'Mkt_wt': 'Total marketable roots'
}

# Melt the DataFrame and apply renaming
melted_df = yield_summary.melt(id_vars=['Year', 'P_appln'], 
                                value_vars=list(name_mapping.keys()),
                                var_name='Root_quality', value_name='Average Yield')

# Apply the renaming mapping to 'Root_quality'
melted_df['Root_quality'] = melted_df['Root_quality'].replace(name_mapping)

melted_df.info()
melted_df.head()

In [None]:
# CONVERSIONS
# Yield from kg/plot to Mg/ha
import pandas as pd

# Plot area in hectares
plot_area_ha = 0.0009282

melted_df['Yield'] = melted_df['Average Yield'] / 1000 / plot_area_ha


# P_appln' from pounds per acre (lb/A) to kilograms per hectare (kg/ha)

melted_df['Phosphorus Fertilizer'] = (melted_df['P_appln'] * 1.12085).astype(int)

melted_df.info()
melted_df.head()

In [None]:
# Set the style for seaborn
sns.set(style="ticks")

# Define marker styles for each Root_quality category
marker_styles = {
    'Jumbo': '^',                # Triangle up
    'US_No.1': 'o',              # Circle
    'Canner': 's',               # Square
    'Total marketable roots': 'D' # Diamond
}

# Create a facet grid for the yields comparison
g = sns.relplot(
    data=melted_df,
    x='Phosphorus Fertilizer', y='Yield',
    hue='Root_quality', style='Root_quality',
    col='Year',
    kind='line', height=5.2, aspect=1.06,
    palette='colorblind', markers=marker_styles,
    legend=True
)

# Remove the default legend on the right
if g._legend:
    g._legend.remove()

# Add a custom legend above the plots
g.fig.legend(
    title='', loc='upper center', bbox_to_anchor=(0.5, 1.07),
    ncol=4, fontsize=22, frameon=False
)

# Set subplot titles
g.set_titles(col_template="{col_name}", size=24)

# Customize axis labels
for i, ax in enumerate(g.axes.flat):
    if i == 1:  # Only second subplot gets x-axis label
        ax.set_xlabel("Phosphorus Fertilizer (kg/ha)", fontsize=24)
    else:
        ax.set_xlabel("")

    if i % len(g.axes.flat) == 0:  # Only first column gets y-axis label
        ax.set_ylabel("Average Root Weight (Mg/ha)", fontsize=24)
    else:
        ax.set_ylabel("")

    ax.tick_params(axis='both', labelsize=24)

# Adjust layout to make room for title and legend
g.fig.subplots_adjust(top=0.85, bottom=0.15, left=0.15)

# Save the figure
g.savefig("yield_avgs_0218_plot.png", dpi=300)
plt.show()
