### 1. Define necessary functions
### 2. Define clusters of two-dimensional points and calculate and print statistics for each cluster
### 3. Define new points to be assigned to a cluster 
### 4. Calculate the new normalized Euclidean distance threshold based on the dimensionality of the data
### 5. Assign new points to a cluster based on normalized Euclidean distance

In [16]:
import numpy as np
import pandas as pd

# Define necessary functions
def calculate_statistics(points, decimals):
    """Calculate various statistics for each dimension of a set of points."""
    mean = np.mean(points, axis=0)
    variance = np.var(points, axis=0, ddof=0)  # Population variance
    std_dev = np.sqrt(variance)
    sum_i = np.sum(points, axis=0)
    sum_sq_i = np.sum(points**2, axis=0)
    return np.round(mean, decimals), np.round(variance, decimals), np.round(std_dev, decimals), np.round(sum_i, decimals), np.round(sum_sq_i, decimals)

def calculate_mahalanobis_distance(new_point, cluster_points, decimals=4):
    """Calculate the Mahalanobis distance of a new point from a cluster."""
    cluster_center = np.mean(cluster_points, axis=0)
    std_devs = np.sqrt(np.var(cluster_points, axis=0, ddof=0))
    
    normalized_diff = (new_point - cluster_center) / std_devs
    distance = np.sqrt(np.sum(normalized_diff ** 2))
    return round(distance, decimals)
#---------INPUT AREA---------------------------------------------------------------
# Number of decimal places for all outputs
decimals = 3

# Define clusters of two-dimensional points
cluster1_points = np.array([[1, 7], [1, 10], [3, 8], [4, 10], [2, 9]])  # Cluster 1
cluster2_points = np.array([[6, 2], [6, 4], [8, 2], [8, 5]])  # Cluster 2
cluster3_points = np.array([[1, 2], [2, 1], [2, 3]])  # Cluster 3

# Calculate and print statistics for each cluster
clusters = [cluster1_points, cluster2_points, cluster3_points]
thresholds = []
dimensionality = cluster1_points.shape[1]  # Assuming all clusters have the same dimensionality
for i, cluster in enumerate(clusters, 1):
    mean, var, std, sum_i, sum_sq_i = calculate_statistics(cluster, decimals)
    threshold = 2 * np.sqrt(dimensionality)  # Threshold is 2 times the square root of the dimensionality
    thresholds.append(threshold)
    print(f"Cluster {i} - Mean: {mean}, Variance: {var}, Standard Deviation: {std}, Sum: {sum_i}, Sum of Squares: {sum_sq_i}, Threshold: {threshold:.3f}")

# Define new points to be assigned to a cluster
new_points = np.array([[6, 8], [2, 2], [6, 5]])  # New points
#---------------------------------------------------------------------------------------------------
# Assign new points to a cluster based on Mahalanobis distance
for new_point in new_points:
    distances = [calculate_mahalanobis_distance(new_point, cluster, decimals=decimals) for cluster in clusters]

    # Find the cluster with the minimum distance that is also below the cluster-specific threshold
    min_distance = min(distances)
    assigned_cluster = None
    for idx, distance in enumerate(distances):
        if distance < thresholds[idx]:
            assigned_cluster = idx + 1
            break

    distance_str = ", ".join([f"Cluster {i+1}: {dist}" for i, dist in enumerate(distances)])
    if assigned_cluster:
        print(f"Point {new_point} assigned to Cluster {assigned_cluster} (Distance: {min_distance}) - Distances: [{distance_str}]")
    else:
        print(f"Point {new_point} not assigned to any cluster (Distances: [{distance_str}])")



Cluster 1 - Mean: [2.2 8.8], Variance: [1.36 1.36], Standard Deviation: [1.166 1.166], Sum: [11 44], Sum of Squares: [ 31 394], Threshold: 2.828
Cluster 2 - Mean: [7.   3.25], Variance: [1.    1.688], Standard Deviation: [1.    1.299], Sum: [28 13], Sum of Squares: [200  49], Threshold: 2.828
Cluster 3 - Mean: [1.667 2.   ], Variance: [0.222 0.667], Standard Deviation: [0.471 0.816], Sum: [5 6], Sum of Squares: [ 9 14], Threshold: 2.828
Point [6 8] not assigned to any cluster (Distances: [Cluster 1: 3.33, Cluster 2: 3.791, Cluster 3: 11.769])
Point [2 2] assigned to Cluster 3 (Distance: 0.707) - Distances: [Cluster 1: 5.833, Cluster 2: 5.092, Cluster 3: 0.707]
Point [6 5] assigned to Cluster 2 (Distance: 1.678) - Distances: [Cluster 1: 4.608, Cluster 2: 1.678, Cluster 3: 9.899]
