In [1]:
import pandas as pd
import numpy as np
from ahpy import Compare
from sklearn.preprocessing import MinMaxScaler

# Load and preprocess data
def load_data():
    # Load primary dataset with pollution and tree cluster data
    merged_data = pd.read_csv('cluster_with_pollution_data.csv')

    # Convert necessary columns from string representation of lists to actual lists
    merged_data['coordinates_lon'] = merged_data['coordinates_lon'].apply(eval)
    merged_data['coordinates_lat'] = merged_data['coordinates_lat'].apply(eval)
    merged_data['speciesId'] = merged_data['speciesId'].apply(eval)
    
    return merged_data

# Calculate criteria: AQI Impact, Species Importance, Tree Density
def calculate_criteria(df):
    # AQI Impact: Derived from AQI, CO, NO2, PM2.5
    df['AQI_Impact'] = 0.4 * df['AQI'] + 0.2 * df['CO'] + 0.2 * df['NO2'] + 0.2 * df['PM2.5']
    
    # Species Importance: Simple count of unique species in each cluster (assuming higher count means higher importance)
    df['Species_Importance'] = df['speciesId'].apply(lambda x: len(set(x)))
    
    # Tree Density: Based on the number of coordinates per cluster (assuming more coordinates imply higher density)
    df['Tree_Density'] = df['coordinates_lon'].apply(len)
    
    return df

# Step 2: AHP - Assign Weights
def calculate_ahp_weights():
    criteria = {
        ('AQI_Impact', 'Species_Importance'): 3,
        ('AQI_Impact', 'Tree_Density'): 5,
        ('Species_Importance', 'Tree_Density'): 4,
    }
    comparison = Compare('Criteria', criteria, precision=3)
    weights = comparison.target_weights
    print("AHP Criteria Weights:", weights)
    print("Consistency Ratio:", comparison.consistency_ratio)
    return weights

# Step 3: TOPSIS - Rank Trees/Clusters for Removal
def topsis_ranking(df, weights, num_trees_to_remove):
    # Set thresholds for preservation
    aqi_preservation_threshold = df['AQI_Impact'].quantile(0.7)  # High AQI impact preservation threshold
    species_importance_threshold = df['Species_Importance'].quantile(0.7)  # High species importance threshold

    # Mark clusters with high AQI impact or high species importance for preservation
    df['Preserve'] = (df['AQI_Impact'] >= aqi_preservation_threshold) | \
                     (df['Species_Importance'] >= species_importance_threshold)

    # Separate clusters to preserve and those eligible for removal
    removal_candidates = df[~df['Preserve']].copy()

    # Normalize criteria for removal candidates
    scaler = MinMaxScaler()
    criteria_data = removal_candidates[['AQI_Impact', 'Species_Importance', 'Tree_Density']]
    normalized_data = scaler.fit_transform(criteria_data)

    # Apply weights from AHP
    weighted_data = normalized_data * np.array(list(weights.values()))

    # Calculate ideal (best) and negative-ideal (worst) solutions
    ideal_solution = np.max(weighted_data, axis=0)
    negative_ideal_solution = np.min(weighted_data, axis=0)

    # Calculate distances to ideal and negative-ideal solutions
    dist_to_ideal = np.sqrt(np.sum((weighted_data - ideal_solution) ** 2, axis=1))
    dist_to_negative_ideal = np.sqrt(np.sum((weighted_data - negative_ideal_solution) ** 2, axis=1))

    # Calculate relative closeness to ideal solution
    topsis_score = dist_to_negative_ideal / (dist_to_ideal + dist_to_negative_ideal)
    removal_candidates['TOPSIS_Score'] = topsis_score

    # Rank trees/clusters based on the TOPSIS score (higher score is closer to ideal for removal)
    removal_candidates = removal_candidates.sort_values(by='TOPSIS_Score', ascending=True)
    
    # Select top N clusters for removal based on the input number of trees
    print(f"\nTop {num_trees_to_remove} Clusters/Trees for Removal with Reasoning:\n")
    selected_clusters = removal_candidates.head(num_trees_to_remove)
    
    for index, row in selected_clusters.iterrows():
        print(f"Species ID: {row['speciesId']}, Location: ({row['average_lon']}, {row['average_lat']})")
        print(f"TOPSIS Score: {row['TOPSIS_Score']:.4f}")
        
        # Provide reasoning based on the criteria weights
        reasoning = []
        if row['AQI_Impact'] <= aqi_preservation_threshold:
            reasoning.append("Low AQI Impact, minimal effect on air quality if removed.")
        else:
            reasoning.append("High AQI Impact, essential for maintaining air quality. Consider preserving.")
        
        if row['Species_Importance'] < species_importance_threshold:
            reasoning.append("Low Species Importance, less critical species.")
        else:
            reasoning.append("High Species Importance, species with ecological significance. Consider preserving.")
        
        if row['Tree_Density'] >= 0.5:
            reasoning.append("High Tree Density, part of a dense area suitable for selective removal.")
        else:
            reasoning.append("Low Tree Density, sparse area where trees should be preserved.")
        
        print("Reasoning:")
        for reason in reasoning:
            print(f"- {reason}")
        print("\n---\n")
    
    return selected_clusters

# Main function to load data and execute AHP and TOPSIS
def main():
    # Load data
    merged_data = load_data()

    # Calculate criteria for MCDM
    merged_data = calculate_criteria(merged_data)

    # Step 1: Calculate AHP weights
    weights = calculate_ahp_weights()

    # Ask user for the number of trees to remove
    try:
        num_trees_to_remove = int(input("Enter the number of trees to remove: "))
    except ValueError:
        print("Invalid input. Please enter a numerical value.")
        return

    # Ensure that columns for criteria (like AQI Impact, Species Importance, etc.) are present
    if all(col in merged_data.columns for col in ['AQI_Impact', 'Species_Importance', 'Tree_Density']):
        # Step 2: Apply TOPSIS ranking
        selected_clusters = topsis_ranking(merged_data, weights, num_trees_to_remove)
        print(selected_clusters[['speciesId', 'average_lon', 'average_lat', 'TOPSIS_Score']])
    else:
        print("Error: Ensure that all necessary criteria columns are present in 'cluster_with_pollution_data.csv'.")

# Run the main function
if __name__ == "__main__":
    main()

AHP Criteria Weights: {'AQI_Impact': 0.627, 'Species_Importance': 0.28, 'Tree_Density': 0.094}
Consistency Ratio: 0.087

Top 100 Clusters/Trees for Removal with Reasoning:

Species ID: [963.0, 2678.0, 5745.0], Location: (-1.24129, 43.0853)
TOPSIS Score: 0.0664
Reasoning:
- Low AQI Impact, minimal effect on air quality if removed.
- Low Species Importance, less critical species.
- High Tree Density, part of a dense area suitable for selective removal.

---

Species ID: [10600.0, 11176.0, 963.0], Location: (-1.11561, 43.03547)
TOPSIS Score: 0.1039
Reasoning:
- Low AQI Impact, minimal effect on air quality if removed.
- Low Species Importance, less critical species.
- High Tree Density, part of a dense area suitable for selective removal.

---

Species ID: [7760.0, 8428.0, 1495.0, 5542.0, 2025.0, 8705.0, 976.0, 10247.0, 963.0], Location: (-0.64592, 42.95968)
TOPSIS Score: 0.1042
Reasoning:
- Low AQI Impact, minimal effect on air quality if removed.
- Low Species Importance, less critical 