In [1]:
import pandas as pd
import numpy as np
from ahpy import Compare
from sklearn.preprocessing import MinMaxScaler
from geopy.distance import geodesic
import matplotlib.pyplot as plt

# Load and preprocess data
def load_data():
    merged_data = pd.read_csv('cluster_with_pollution_data.csv')
    merged_data['coordinates_lon'] = merged_data['coordinates_lon'].apply(eval)
    merged_data['coordinates_lat'] = merged_data['coordinates_lat'].apply(eval)
    merged_data['speciesId'] = merged_data['speciesId'].apply(eval)
    
    # Placeholder example for distance calculations
    merged_data['distance_to_urban'] = merged_data.apply(lambda row: calculate_distance_to_urban(row), axis=1)
    merged_data['distance_to_water'] = merged_data.apply(lambda row: calculate_distance_to_water(row), axis=1)
    
    return merged_data

# Distance calculation functions
def calculate_distance_to_urban(row):
    urban_location = (42.0, -0.5)
    return geodesic((row['average_lat'], row['average_lon']), urban_location).km

def calculate_distance_to_water(row):
    water_location = (42.2, -0.7)
    return geodesic((row['average_lat'], row['average_lon']), water_location).km


In [2]:
# AHP - Assign Weights with Location criteria included
def calculate_ahp_weights():
    criteria = {
        ('AQI_Impact', 'Species_Importance'): 3,
        ('AQI_Impact', 'Tree_Density'): 7,
        ('Species_Importance', 'Tree_Density'): 4,
        ('Species_Importance', 'distance_to_urban'): 5,
        ('Tree_Density', 'distance_to_water'): 6,
    }
    comparison = Compare('Criteria', criteria, precision=3)
    weights = comparison.target_weights
    print("AHP Criteria Weights:", weights)
    print("Consistency Ratio:", comparison.consistency_ratio)
    return weights


In [3]:
# TOPSIS - Rank Trees/Clusters and provide reasoning with Location
def topsis_ranking(df, weights):
    scaler = MinMaxScaler()
    criteria_data = df[['AQI_Impact', 'Species_Importance', 'Tree_Density', 'distance_to_urban', 'distance_to_water']]
    normalized_data = scaler.fit_transform(criteria_data)

    weighted_data = normalized_data * np.array(list(weights.values()))
    
    ideal_solution = np.max(weighted_data, axis=0)
    negative_ideal_solution = np.min(weighted_data, axis=0)

    dist_to_ideal = np.sqrt(np.sum((weighted_data - ideal_solution) ** 2, axis=1))
    dist_to_negative_ideal = np.sqrt(np.sum((weighted_data - negative_ideal_solution) ** 2, axis=1))

    topsis_score = dist_to_negative_ideal / (dist_to_ideal + dist_to_negative_ideal)
    df['TOPSIS_Score'] = topsis_score

    df = df.sort_values(by='TOPSIS_Score', ascending=False)
    
    print("Ranked Trees/Clusters for Preservation/Removal with Location-Based Reasoning:\n")
    for index, row in df.iterrows():
        print(f"Species ID: {row['speciesId']}, Location: ({row['coordinates_lon']}, {row['coordinates_lat']})")
        print(f"TOPSIS Score: {row['TOPSIS_Score']:.4f}")
        
        reasoning = []
        if row['AQI_Impact'] > 0.5:
            reasoning.append("High AQI Impact, essential for maintaining air quality. Consider preserving.")
        if row['distance_to_urban'] < 5:
            reasoning.append("Close to urban area, contributing to urban air quality.")
        if row['distance_to_water'] < 2:
            reasoning.append("Close to water body, providing erosion control.")
        
        print("Reasoning:")
        for reason in reasoning:
            print(f"- {reason}")
        print("\n---\n")
    
    return df


In [None]:
# Visualize all trees and selected area
def plot_all_trees(df, input_location, radius_km):
    plt.figure(figsize=(10, 10))

    # Plot all trees
    plt.scatter(df['average_lon'], df['average_lat'], color='gray', s=10, alpha=0.5, label='All Trees')

    # Filter trees within the radius
    trees_in_radius = []
    for idx, row in df.iterrows():
        tree_location = (row['average_lat'], row['average_lon'])
        distance = geodesic(input_location, tree_location).km 
        if distance <= radius_km:
            trees_in_radius.append(row)
            plt.scatter(row['average_lon'], row['average_lat'], color='green', s=30, label='Trees in Radius' if idx == 0 else "")

    # Plot input location and radius circle
    plt.scatter(input_location[1], input_location[0], color='red', s=100, label='Input Location')
    circle = plt.Circle((input_location[1], input_location[0]), radius=radius_km, color='blue', fill=False, linestyle='--', label=f'{radius_km} km Radius')
    plt.gca().add_patch(circle)

    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.legend(loc='upper right')
    plt.title("Tree Locations with Specified Area of Interest")
    plt.grid(True)
    plt.show()
    
    return pd.DataFrame(trees_in_radius)


In [5]:
def main():
    # Load and preprocess data
    merged_data = load_data()
    
    # Calculate AHP weights
    weights = calculate_ahp_weights()

    # Plot all trees and prompt for user input
    input_location = (float(input("Enter latitude of the center location: ")), float(input("Enter longitude of the center location: ")))
    radius_km = float(input("Enter search radius in km: "))
    
    # Visualize and select trees within radius
    selected_trees_df = plot_all_trees(merged_data, input_location, radius_km)

    # Ensure necessary columns are present
    if all(col in selected_trees_df.columns for col in ['AQI_Impact', 'Species_Importance', 'Tree_Density', 'distance_to_urban', 'distance_to_water']):
        # Perform TOPSIS ranking on selected trees
        topsis_ranked_trees = topsis_ranking(selected_trees_df, weights)
        print(topsis_ranked_trees[['speciesId', 'average_lon', 'average_lat', 'TOPSIS_Score']])
    else:
        print("Error: Ensure that all necessary criteria columns are present.")
