In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import sys
sys.path.insert(0, "../src")
from ST_Cluster import *

# Load Disaster Data

In [None]:
sensitivity_df = pd.read_csv("../../Clusters/cluster_sensitivities_di.csv")
parameters = ['space_thres', 'time_thres', 'num_thres']

In [None]:
results = ['count_complex_cases', 'percent_clustered', 'total_clusters']
plot_scatter_lines(sensitivity_df, parameters, results)
plot_heatmaps(sensitivity_df, 'space_thres', 'time_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'num_thres', 'space_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'time_thres', 'num_thres', results, figsize=(12, 8))

In [None]:
results = ['avg_duration', 'max_duration', 'avg_lat_span', 'max_lat_span', 'avg_lon_span', 'max_lon_span']
plot_scatter_lines(sensitivity_df, parameters, results)
plot_heatmaps(sensitivity_df, 'space_thres', 'time_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'num_thres', 'space_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'time_thres', 'num_thres', results, figsize=(12, 8))

In [None]:
# Selecting relevant columns
objectives = sensitivity_df[["percent_clustered", "count_complex_cases"]]
scaler = MinMaxScaler()

# Normalize 'percent_clustered' and 'total_clusters' (maximize), and 'count_complex_cases' (minimize)
objectives["percent_clustered"] = scaler.fit_transform(objectives[["percent_clustered"]])
objectives["count_complex_cases"] = 1 - scaler.fit_transform(objectives[["count_complex_cases"]])

# Calculate a combined score (weighted equally)
sensitivity_df["combined_score"] = (
    objectives["percent_clustered"] +
    objectives["count_complex_cases"]
)

# Find the row with the highest combined score
best_combination = sensitivity_df.loc[sensitivity_df["combined_score"].idxmax()]

# Display the best parameter combination and scores
best_combination

In [None]:
# Selecting relevant columns
objectives = sensitivity_df[["percent_clustered", "count_complex_cases", "total_clusters"]]

# Initialize the scaler
scaler = MinMaxScaler()

# Normalize metrics
sensitivity_df["percent_clustered_norm"] = scaler.fit_transform(objectives[["percent_clustered"]])
sensitivity_df["count_complex_cases_norm"] = 1 - scaler.fit_transform(objectives[["count_complex_cases"]])
sensitivity_df["total_clusters_norm"] = scaler.fit_transform(objectives[["total_clusters"]])

# Define weights for each metric
weights = {
    "percent_clustered_norm": 1.0,  # Weight for percent_clustered (maximize)
    "count_complex_cases_norm": 1.0,  # Weight for count_complex_cases (minimize)
    "total_clusters_norm": 1.0,  # Weight for total_clusters (maximize)
}

# Calculate a combined score with weights
sensitivity_df["combined_score"] = (
    sensitivity_df["percent_clustered_norm"] * weights["percent_clustered_norm"] +
    sensitivity_df["count_complex_cases_norm"] * weights["count_complex_cases_norm"] +
    sensitivity_df["total_clusters_norm"] * weights["total_clusters_norm"]
)

# Find the row with the highest combined score
best_combination = sensitivity_df.loc[sensitivity_df["combined_score"].idxmax()]

# Display the best parameter combination and scores
print(best_combination)


# Load Claims Data

In [None]:
sensitivity_df = pd.read_csv("../../Clusters/cluster_sensitivities_cl.csv")
parameters = ['space_thres', 'time_thres', 'num_thres']

In [None]:
results = ['unclustered_percentage', 'total_clusters']
plot_scatter_lines(sensitivity_df, parameters, results)
plot_heatmaps(sensitivity_df, 'space_thres', 'time_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'num_thres', 'space_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'time_thres', 'num_thres', results, figsize=(12, 8))

In [None]:
results = ['avg_duration', 'max_duration', 'avg_lat_span', 'max_lat_span', 'avg_lon_span', 'max_lon_span']
plot_scatter_lines(sensitivity_df, parameters, results)
plot_heatmaps(sensitivity_df, 'space_thres', 'time_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'num_thres', 'space_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'time_thres', 'num_thres', results, figsize=(12, 8))

In [None]:
results = ['var_duration', 'var_lat_span', 'var_lon_span']
plot_scatter_lines(sensitivity_df, parameters, results)
plot_heatmaps(sensitivity_df, 'space_thres', 'time_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'num_thres', 'space_thres', results, figsize=(12, 8))
plot_heatmaps(sensitivity_df, 'time_thres', 'num_thres', results, figsize=(12, 8))

In [None]:
# Redefine objectives for optimization
objectives = [
    ("unclustered_percentage", "min"),  # Minimize
    ("total_clusters", "max"),  # Maximize
]

# Define constraints
max_duration_limit = 90 
max_lon_span_limit = 120
max_lat_span_limit = 25

# Create a weighted objective score function
def compute_objective_score(row):
    score = 0
    for column, goal in objectives:
        if goal == "max":
            score += row[column]
        elif goal == "min":
            score -= row[column]
    return score

# Apply constraints to filter the dataset
constrained_data = sensitivity_df[
    (sensitivity_df["max_duration"] < max_duration_limit) &
    (sensitivity_df["max_lon_span"] < max_lon_span_limit) &
    (sensitivity_df["max_lat_span"] < max_lat_span_limit)
]

# Recompute the optimal parameters under constraints
if not constrained_data.empty:
    constrained_data["objective_score"] = constrained_data.apply(compute_objective_score, axis=1)
    optimal_parameters_constrained = constrained_data.loc[
        constrained_data["objective_score"].idxmax(), ["space_thres", "time_thres", "num_thres"]
    ]
else:
    optimal_parameters_constrained = "No feasible solution under given constraints"

optimal_parameters_constrained

In [None]:
# Define constraints
max_duration_limit = 90
max_lon_span_limit = 120
max_lat_span_limit = 25

# Apply constraints to filter the dataset
filtered_df = sensitivity_df[
    (sensitivity_df["max_duration"] < max_duration_limit) &
    (sensitivity_df["max_lon_span"] < max_lon_span_limit) &
    (sensitivity_df["max_lat_span"] < max_lat_span_limit)
]

# Normalize the objectives
if not filtered_df.empty:
    objectives = filtered_df[
        ["unclustered_percentage", "total_clusters"]
    ]
    scaler = MinMaxScaler()

    # Maximize certain metrics and minimize 'count_complex_cases'
    objectives["unclustered_percentage"] = 1 - scaler.fit_transform(objectives[["unclustered_percentage"]])
    objectives["total_clusters"] = scaler.fit_transform(objectives[["total_clusters"]])

    # Calculate a combined score (weighted equally)
    filtered_df["combined_score"] = objectives.sum(axis=1)

    # Find the row with the highest combined score
    best_combination = filtered_df.loc[filtered_df["combined_score"].idxmax()]

    # Display the best parameter combination and scores
    print("Best Parameter Combination and Scores:")
    print(best_combination)
else:
    print("No feasible solution under the given constraints.")

In [None]:
# Define constraints
max_duration_limit = 90
max_lon_span_limit = 120
max_lat_span_limit = 25

# Apply constraints to filter the dataset
filtered_df = sensitivity_df[
    (sensitivity_df["max_duration"] < max_duration_limit) &
    (sensitivity_df["max_lon_span"] < max_lon_span_limit) &
    (sensitivity_df["max_lat_span"] < max_lat_span_limit)
]

# Normalize the objectives
if not filtered_df.empty:
    # Only consider 'unclustered_percentage' for objective calculation
    objectives = filtered_df[["unclustered_percentage"]]
    scaler = MinMaxScaler()

    # Maximize 'unclustered_percentage' (by minimizing the complement)
    objectives["unclustered_percentage"] = 1 - scaler.fit_transform(objectives[["unclustered_percentage"]])

    # Add normalized objective score to the filtered dataframe
    filtered_df["combined_score"] = objectives.sum(axis=1)

    # Find the row with the highest combined score
    best_combination = filtered_df.loc[filtered_df["combined_score"].idxmax()]

    # Display the best parameter combination and scores
    print("Best Parameter Combination and Scores:")
    print(best_combination)
else:
    print("No feasible solution under the given constraints.")