In [16]:
import pandas as pd

# Load DataFrames from Excel and CSV files
df = pd.read_excel(r'python generated excel files\priority_lists_by_visibility_months_and_presence_at_mgp.xlsx')
df_habitat = pd.read_excel(r'python generated excel files\habitat_types_at_mgp.xlsx')
df_paf = pd.read_csv(r'data inputs\pafs_cal_ipc_invasiveness_scoring_data.csv')

def process_species_data(species_df, pafs_df):
    # Create five columns in the species DataFrame labeled "D", "C", "B", "A", "U"
    new_columns = ['D', 'C', 'B', 'A', 'U']
    for col_label in new_columns:
        species_df[col_label] = ""

    # Iterate through each row in the pafs DataFrame
    for index, row in pafs_df.iterrows():
        # Iterate through each column in pafs DataFrame (except the first column which contains species names)
        for col_label in pafs_df.columns[1:]:
            # Check if the column label contains "Worksheet C"
            if "Worksheet C" in col_label:
                # Get the value of the current column
                value = row[col_label]
                # If the value is not empty and is one of "D", "C", "B", "A", or "U"
                if value in new_columns:
                    # Split the column label by comma and get the second part, if exists
                    parts = col_label.split(',')
                    if len(parts) > 1:
                        second_part = parts[1].strip()
                    else:
                        second_part = col_label.strip()
                    # Append the second part of the column name to the corresponding cell in the species DataFrame
                    species_df.loc[species_df['Species'] == row['Species'], value] += second_part + ";"

    # Remove trailing semicolons from the concatenated values
    for col_label in new_columns:
        species_df[col_label] = species_df[col_label].str.rstrip(';')

    # Create the concatenated column
    species_df['Concatenated'] = species_df[new_columns].apply(lambda x: ';'.join(x.dropna()), axis=1)
    
    # Reorder columns to ensure original columns are preserved
    original_columns = list(species_df.columns)
    species_df = species_df[['Species'] + original_columns[original_columns.index('Concatenated')+1:] + ['Concatenated'] + original_columns[1:original_columns.index('Concatenated')]]
    
    return species_df



df = process_species_data(df, df_paf)

# Merge df with df_paf
df = pd.merge(df, df_paf, how="left", on="Species")
import pandas as pd

# Create a set of all species
all_species = set(df['Species'])

# Create a dictionary to store habitats and associated species
habitats_dict = {}

# Iterate over DataFrame rows
for index, row in df.iterrows():
    species = row['Species']
    concatenated = row['Concatenated']
    
    # Split concatenated habitats
    habitats = concatenated.split(';')
    
    # Iterate over habitats
    for habitat in habitats:
        habitat = habitat.strip()
        if habitat and habitat != ';':
            if habitat not in habitats_dict:
                habitats_dict[habitat] = []
            habitats_dict[habitat].append(species)

# Create sets to remove duplicates
unique_habitats = set(habitat.strip() for habitat_list in habitats_dict.keys() for habitat in habitat_list)
unique_species_with_habitats = set(species for species_list in habitats_dict.values() for species in species_list)

# Identify species with no associated habitats
species_no_habitats = all_species - unique_species_with_habitats

# Convert dictionaries to tabular format
habitats_table = [[habitat, ', '.join(habitats_dict.get(habitat, []))] for habitat in unique_habitats]
species_no_habitats_table = [[species] for species in species_no_habitats]

# Print habitats and associated species table
print("Habitats and associated species:")
print(tabulate(habitats_table, headers=["Habitat", "Associated Species"], tablefmt="grid"))

# Print species with no associated habitats table
print("\nSpecies with no associated habitats:")
print(tabulate(species_no_habitats_table, headers=["Species"], tablefmt="grid"))




Habitats and associated species:


NameError: name 'tabulate' is not defined