In [4]:
import pandas as pd
from sklearn.cluster import KMeans
from geopy.distance import geodesic

# Function to calculate haversine distance from cluster centroid
def calculate_average_haversine_distance(cluster_data, centroid):
    distances = cluster_data.apply(
        lambda row: geodesic((row['lat'], row['lng']), centroid).kilometers, axis=1
    )
    return distances.mean()

# Recursive function to refine clusters up to a maximum of 3 sub-clusters
def enforce_haversine_constraint(cluster_data, max_distance_km, max_sub_clusters=3):
    coords = cluster_data[['lat', 'lng']].values
    for num_clusters in range(2, max_sub_clusters + 1):
        kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(coords)

        # Generate sub-cluster labels as parent_cluster_number + .1, .2, etc.
        parent_cluster_id = cluster_data['cluster'].iloc[0]
        cluster_data['refined_cluster'] = [
            f"{parent_cluster_id}.{i+1}" for i in kmeans.labels_
        ]

        # Check all sub-clusters
        all_distances_satisfied = True
        for sub_cluster_id, sub_cluster_data in cluster_data.groupby('refined_cluster'):
            centroid = (sub_cluster_data['lat'].mean(), sub_cluster_data['lng'].mean())
            avg_haversine_distance = calculate_average_haversine_distance(sub_cluster_data, centroid)
            if avg_haversine_distance > max_distance_km:
                all_distances_satisfied = False

        # If all sub-clusters satisfy the constraint, return the refined data
        if all_distances_satisfied:
            return cluster_data, num_clusters

    # If the constraint isn't met after max_sub_clusters, return the final result
    print(f"Cluster {parent_cluster_id} could not meet the 30 km constraint after {max_sub_clusters} sub-clusters.")
    return cluster_data, max_sub_clusters

# Clustering function using K-Means with hierarchical refinement
def cluster_data_with_kmeans(ics_data, member_data, pacs_data, max_distance_km=30, initial_clusters=50, max_sub_clusters=3):
    # Assign types and combine datasets
    ics_data['type'] = 'ICS'
    member_data['type'] = 'Member'
    pacs_data['type'] = 'PACS'
    combined_data = pd.concat([ics_data, member_data, pacs_data], ignore_index=True)

    print(f"Starting with {initial_clusters} clusters and refining up to {max_sub_clusters} sub-clusters.")

    # Step 1: Initial Clustering with 50 Clusters
    coords = combined_data[['lat', 'lng']].values
    kmeans = KMeans(n_clusters=initial_clusters, random_state=0).fit(coords)
    combined_data['cluster'] = kmeans.labels_

    # Step 2: Refining Clusters to Satisfy Distance Constraint
    refined_clusters = []
    for cluster_id, cluster_data in combined_data.groupby('cluster'):
        centroid = (cluster_data['lat'].mean(), cluster_data['lng'].mean())
        avg_haversine_distance = calculate_average_haversine_distance(cluster_data, centroid)

        if avg_haversine_distance > max_distance_km:
            print(f"Cluster {cluster_id} exceeds max distance. Refining up to {max_sub_clusters} sub-clusters...")
            refined_cluster, _ = enforce_haversine_constraint(cluster_data, max_distance_km, max_sub_clusters)
            refined_clusters.append(refined_cluster)
        else:
            cluster_data['refined_cluster'] = f"{cluster_id}.1"
            refined_clusters.append(cluster_data)

    # Combine all refined clusters
    combined_refined_data = pd.concat(refined_clusters, ignore_index=True)
    return combined_refined_data

# Main function to execute the workflow
def main():
    # Load datasets
    print("Loading datasets...")
    ics_data = pd.read_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\ICS_Tracker - ICS_Tracker_Final.csv')
    member_data = pd.read_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\ICS_Tracker - NCOL_Member_Tracker.csv')
    pacs_data = pd.read_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\Pacs_Pan_India_Processed_Pincode.csv')

    # Clean and preprocess data
    print("Cleaning and preparing data...")
    def clean_and_validate_coordinates(data, lat_col, lng_col, PIN_Code):
        data['lat'] = pd.to_numeric(data[lat_col], errors='coerce')
        data['lng'] = pd.to_numeric(data[lng_col], errors='coerce')
        data['PIN_Code'] = pd.to_numeric(data[PIN_Code], errors='coerce')
        return data.dropna(subset=['lat', 'lng'])

    ics_data = clean_and_validate_coordinates(ics_data, 'ICS_Lattitude', 'ICS_Longitude', 'ICS_Pincode')
    member_data = clean_and_validate_coordinates(member_data, 'Latitude', 'Longitude', 'Pincode')
    pacs_data = clean_and_validate_coordinates(pacs_data, 'Latitude', 'Longitude', 'Pincode')

    # Perform clustering
    print("Initiating clustering process...")
    combined_refined_data = cluster_data_with_kmeans(
        ics_data, member_data, pacs_data, max_distance_km=30, initial_clusters=50, max_sub_clusters=3)
    
    return combined_refined_data

if __name__ == "__main__":
     main()


Cluster 30 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 31 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 31 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 32 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 32 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 33 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 33 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 35 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 35 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 36 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 36 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 37 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 37 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 38 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 38 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 39 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 39 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 40 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 40 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 41 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 41 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 42 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 42 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 43 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 43 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 44 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 44 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 45 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 45 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 46 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 46 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 47 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 47 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 48 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 48 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 49 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 49 could not meet the 30 km constraint after 3 sub-clusters.


In [None]:

def remove_outliers_from_combined_data(dataframe, cluster_column, pincode_column):
    """
    Removes rows with outlier pin codes from each cluster in a combined DataFrame.

    Args:
        dataframe (pd.DataFrame): The input DataFrame containing clusters and pin codes.
        cluster_column (str): The column name for clusters.
        pincode_column (str): The column name for pin codes.

    Returns:
        pd.DataFrame: A cleaned DataFrame with outliers removed.
    """
    # Create an empty DataFrame to store cleaned data
    cleaned_data = pd.DataFrame(columns=dataframe.columns)
    
    # Process each cluster separately
    for cluster in dataframe[cluster_column].unique():
        cluster_data = dataframe[dataframe[cluster_column] == cluster]
        pincode_series = cluster_data[pincode_column]
        
        # Calculate Q1, Q3, and IQR
        Q1 = pincode_series.quantile(0.25)
        Q3 = pincode_series.quantile(0.75)
        IQR = Q3 - Q1
        
        # Define outlier boundaries
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        # Filter out rows with outlier pin codes
        non_outliers = cluster_data[(pincode_series >= lower_bound) & (pincode_series <= upper_bound)]
        cleaned_data = pd.concat([cleaned_data, non_outliers])
    
    return cleaned_data

combined_refined_data = main()

# Specify the cluster and pin code column names
cluster_column = "refined_cluster"
pincode_column = "PIN_Code"

# Remove outliers and get the cleaned data
cleaned_combined_data = remove_outliers_from_combined_data(combined_refined_data, cluster_column, pincode_column)

 # Separate data by type and remove empty columns
pacs_data = cleaned_combined_data[cleaned_combined_data['type'] == 'PACS'].dropna(axis=1, how='all')
member_data = cleaned_combined_data[cleaned_combined_data['type'] == 'Member'].dropna(axis=1, how='all')
ics_data = cleaned_combined_data[cleaned_combined_data['type'] == 'ICS'].dropna(axis=1, how='all')

    # Prepare summary data
summary_data = []
for cluster_id, cluster_data in cleaned_combined_data.groupby('refined_cluster'):
        centroid = (cluster_data['lat'].mean(), cluster_data['lng'].mean())
        summary_data.append({
            'Refined_Cluster': cluster_id,
            'Median_Latitude': cluster_data['lat'].median(),
            'Median_Longitude': cluster_data['lng'].median(),
            'Number_of_ICS': len(cluster_data[cluster_data['type'] == 'ICS']),
            'Number_of_PACS': len(cluster_data[cluster_data['type'] == 'PACS']),
            'Number_of_Members': len(cluster_data[cluster_data['type'] == 'Member']),
            'Max_PIN_Code': cluster_data['PIN_Code'].max(),  # Add maximum PIN code,
            'Min_PIN_Code': cluster_data['PIN_Code'].min(),  # Add minimum PIN code,
            'Avg_Haversine_Distance_km': calculate_average_haversine_distance(cluster_data, centroid)
        })

summary_data = pd.DataFrame(summary_data)



# Save the cleaned data to a new CSV
 # Save results
print("Saving results...")
pacs_data.to_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\pacs_clusters_kmeans.csv', index=False)
member_data.to_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\member_clusters_kmeans.csv', index=False)
ics_data.to_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\ics_clusters_kmeans.csv', index=False)
summary_data.to_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\cluster_summary_kmeans.csv', index=False)
print("Process completed. Results saved.")


Loading datasets...


  member_data = pd.read_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\ICS_Tracker - NCOL_Member_Tracker.csv')
  pacs_data = pd.read_csv(r'C:\Users\ajmal\OneDrive\Documents\NCOL\ICS Tracker\New folder\Pacs_Pan_India_Processed_Pincode.csv')
  super()._check_params_vs_input(X, default_n_init=10)


Cleaning and preparing data...
Initiating clustering process...
Starting with 50 clusters and refining up to 3 sub-clusters.
Cluster 0 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 0 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 1 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 1 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 2 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 2 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 3 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 3 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 4 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 4 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 5 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 5 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 6 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 6 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 7 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 7 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 8 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 8 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 9 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 9 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 10 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 10 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 11 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 11 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 12 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 12 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 13 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 13 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 14 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 14 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 15 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 15 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 16 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 16 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 17 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 17 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 18 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 18 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 19 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 19 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 20 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 20 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 21 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 21 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 22 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 22 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 23 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 23 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 24 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 24 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 25 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 25 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 26 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 26 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 27 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 27 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 28 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 28 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 29 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 29 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 30 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 30 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 31 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 31 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 32 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 32 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 33 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 33 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 35 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 35 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 36 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 36 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 37 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 37 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 38 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 38 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 39 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 39 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 40 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 40 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 41 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 41 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 42 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 42 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 43 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 43 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 44 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 44 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 45 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 45 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 46 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 46 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 47 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 47 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 48 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 48 could not meet the 30 km constraint after 3 sub-clusters.
Cluster 49 exceeds max distance. Refining up to 3 sub-clusters...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Cluster 49 could not meet the 30 km constraint after 3 sub-clusters.


  cleaned_data = pd.concat([cleaned_data, non_outliers])


Saving results...
Process completed. Results saved.
