In [9]:
import pandas as pd

# Load the CSV into a DataFrame
df = pd.read_csv('gd-evals.csv')

# Identify columns containing "Comments" and drop them, plus additional 8 columns at end
df_no_comments = df.drop(columns=[col for col in df.columns if 'Comments' in col])
df_no_comments = df_no_comments.iloc[:,:-8]
df_no_comments.columns

Index(['Timestamp', 'Evaluator', 'Achillea millefolium (Yarrow)',
       'Foliage Rating', 'Flowering Rating',
       'Pest Tolerance/Disease Resistance Rating', 'Vigor Rating',
       'Overall Appearance Rating', 'Asclepias speciosa (Showy Milkweed)',
       'Foliage Rating.1', 'Flowering Rating.1',
       'Pest Tolerance/Disease Resistance Rating.1', 'Vigor Rating.1',
       'Overall Appearance Rating.1', 'Bouteloua', 'Foliage Rating.2',
       'Flowering Rating.2', 'Pest Tolerance/Disease Resistance Rating.2',
       'Vigor Rating.2', 'Overall Appearance Rating.2', 'Carex pansa',
       'Foliage Rating.3', 'Flowering Rating.3',
       'Pest Tolerance/Disease Resistance Rating.3', 'Vigor Rating.3',
       'Overall Appearance Rating.3', 'Eriogonum umbellatum',
       'Foliage Rating.4', 'Flowering Rating.4',
       'Pest Tolerance/Disease Resistance Rating.4', 'Vigor Rating.4',
       'Overall Appearance Rating.4', 'Foliage Rating.5', 'Flowering Rating.5',
       'Pest Tolerance/Disea

In [8]:
# Extract the plant species names
plant_species_cols = [col for col in df_no_comments.columns if 'Rating' not in col and col not in ['Timestamp', 'Evaluator']]

# List of rating categories
rating_categories = ['Foliage', 'Flowering', 'Pest Tolerance/Disease Resistance', 'Vigor', 'Overall Appearance']

# Container to store reshaped dataframes for each species
dfs = []

# Iterate through each plant species column and reshape
for idx, species_col in enumerate(plant_species_cols[:-1]):  # Exclude the last column since it's not a species name
    # Extract columns for this species
    subset_cols = df_no_comments.columns[idx*5 + 3: idx*5 + 8].tolist()
    subset_df = df_no_comments[['Timestamp', 'Evaluator'] + subset_cols].copy()
    
    # Rename columns for melting
    subset_df.columns = ['Timestamp', 'Evaluator', 'Foliage', 'Flowering', 'Pest Tolerance/Disease Resistance', 'Vigor', 'Overall Appearance']
    
    # Melt dataframe
    melted_df = pd.melt(subset_df, id_vars=['Timestamp', 'Evaluator'], value_vars=rating_categories, var_name='Rating Category', value_name='Rating')
    
    # Add species name
    melted_df['Species'] = species_col
    
    # Append to adjusted_dfs
    dfs.append(melted_df)

# Combine all reshaped dataframes
adjusted_final_df = pd.concat(dfs, ignore_index=True)

# Reorder columns
adjusted_final_df = adjusted_final_df[['Timestamp', 'Evaluator', 'Species', 'Rating Category', 'Rating']]

adjusted_final_df.to_csv('gd-evals-clean.csv')

['Achillea millefolium (Yarrow)',
 'Asclepias speciosa (Showy Milkweed)',
 'Bouteloua',
 'Carex pansa',
 'Eriogonum umbellatum',
 'Eschscholzia californica',
 'Festuca californica',
 'Festuca idahoensis',
 'Mimulus bifidus',
 'Monardella villosa',
 "Penstemon 'Margarita BOP'"]