In [10]:
import pandas as pd

# Read the Excel files into pandas dataframes
df_mgp = pd.read_excel('species_present_at_mgp.xlsx')
df_outside_mgp = pd.read_excel('species_outside_mgp.xlsx')
df_ratings = pd.read_excel('cal-ipc-inventory-data-good.html.xlsx')
pafs = pd.read_csv('pafs_cal_ipc_invasiveness_scoring_data.csv')


def process_species_data(species_df, pafs_df):
    # Create five columns in the species DataFrame labeled "D", "C", "B", "A", "U"
    for col_label in ['D', 'C', 'B', 'A', 'U']:
        species_df[col_label] = ""

    # Iterate through each row in the pafs DataFrame
    for index, row in pafs_df.iterrows():
        # Iterate through each column in pafs DataFrame (except the first column which contains species names)
        for col_label in pafs_df.columns[1:]:
            # Check if the column label contains "Worksheet C"
            if "Worksheet C" in col_label:
                # Get the value of the current column
                value = row[col_label]
                # If the value is not empty and is one of "D", "C", "B", "A", or "U"
                if value in ['D', 'C', 'B', 'A', 'U']:
                    # Split the column label by comma and get the second part, if exists
                    parts = col_label.split(',')
                    if len(parts) > 1:
                        second_part = parts[1].strip()
                    else:
                        second_part = col_label.strip()
                    # Append the second part of the column name to the corresponding cell in the species DataFrame
                    species_df.loc[species_df['Species'] == row['Species'], value] += second_part + ";"

    # Remove trailing semicolons from the concatenated values
    for col_label in ['D', 'C', 'B', 'A', 'U']:
        species_df[col_label] = species_df[col_label].str.rstrip(';')

    # Create the concatenated column as the left-most column
    species_df['Concatenated'] = species_df[['D', 'C', 'B', 'A', 'U']].apply(lambda x: ';'.join(x.dropna()), axis=1)
    
    # Rearrange columns so that 'Concatenated' column is the left-most
    cols = ['Concatenated', 'A','B','C','D', 'U']
    species_df = species_df[cols]
    
    return species_df

# Usage example:

# Find species present at MGP but not outside MGP
species_only_at_mgp = df_mgp[~df_mgp['Species'].isin(df_outside_mgp['Species'])]
species_only_at_mgp = species_only_at_mgp.merge(df_ratings, left_on='Species', right_on='Scientific name')

# Find species present outside MGP but not at MGP
species_only_outside_mgp = df_outside_mgp[~df_outside_mgp['Species'].isin(df_mgp['Species'])]
species_only_outside_mgp = species_only_outside_mgp.merge(df_ratings, left_on='Species', right_on='Scientific name' )

# Find species present at both MGP and outside MGP
species_both = df_mgp[df_mgp['Species'].isin(df_outside_mgp['Species'])]
species_both = species_both.merge(df_ratings,left_on='Species', right_on='Scientific name' )

process_species_data(species_only_at_mgp, pafs)
process_species_data(species_only_outside_mgp, pafs)
process_species_data(species_both, pafs)


# Print the results
print("Species present at MGP but not outside MGP:")
print(species_only_at_mgp)
print("\nSpecies present outside MGP but not at MGP:")
print(species_only_outside_mgp)
print("\nSpecies present at both MGP and outside MGP:")
print(species_both)

# Export the results to an Excel file
with pd.ExcelWriter('species_comparison_results7.xlsx') as writer:
    species_only_at_mgp.to_excel(writer, sheet_name='Species_only_at_MGP', index=False)
    species_only_outside_mgp.to_excel(writer, sheet_name='Species_only_outside_MGP', index=False)
    species_both.to_excel(writer, sheet_name='Species_present_both', index=False)



Species present at MGP but not outside MGP:
                     Species Important notes             Scientific name  \
0            Bromus diandrus       NaN    NaN            Bromus diandrus   
1          Fallopia japonica       NaN    NaN          Fallopia japonica   
2  Heliotropium amplexicaule       NaN    NaN  Heliotropium amplexicaule   
3            Hordeum murinum       NaN    NaN            Hordeum murinum   
4       Hypericum canariense       NaN    NaN       Hypericum canariense   
5           Kniphofia uvaria       NaN    NaN           Kniphofia uvaria   
6          Verbascum thapsus       NaN    NaN          Verbascum thapsus   

                  Common names    Rating Alert CDFA Hort  CWM  ID card  \
0                 ripgut brome  Moderate   NaN  NaN  NaN  NaN      NaN   
1            Japanese knotweed  Moderate     ◆   A*  NaN  NaN      NaN   
2          clasping heliotrope     Watch   NaN  NaN  NaN  NaN      NaN   
3                  hare barley  Moderate   NaN  NaN