### Pairwise distances as a measure of RSSI diversity

#### 1. Expected RSSI in each testing point to get the pairwise distances

In [1]:
import sys
from pathlib import Path

# Add project root to Python path
project_root = Path().resolve().parent  # Go up from notebooks to thesis
sys.path.append(str(project_root))

- Pairwise distances between RSSIs

In [2]:
from utils.tools import distances, LogParams, expected_rssi, df_test, df_AP
from typing import List
from scipy.spatial.distance import pdist
import numpy as np
import pandas as pd

def pairwise_rssi_distances(distances: List[dict] = distances,
                            log_params: LogParams = {'rho_0': 60, 'alpha': 3.5},
                            metric: str = 'euclidean') -> List[float]:
    """
    Calculate pairwise distances based on RSSI values using the expected RSSI model.

    Args:
        rssi_data (pd.DataFrame): DataFrame containing RSSI values with 'x', 'y', and 'rssi' columns.
        log_params (LogParams): Parameters for the expected RSSI model.

    Returns:
        List[float]: List of pairwise distances.
    """

    # Calculate expected RSSI values based on distances and log parameters
    expected_rssi_df = expected_rssi(distances, df_AP, log_params)

    # Get all WAP columns from the test dataframe
    wap_columns = [col for col in expected_rssi_df.columns if col.startswith('WAP')]
    # Extract the RSSI values for the WAPs
    rssi_array = expected_rssi_df[wap_columns].values
    # Extract unique RSSI arrays from rssi_array
    #rssi_array = np.unique(rssi_array, axis=0)

    # Calculate pairwise distances
    pw_rssi_list = pdist(rssi_array, metric=metric)

    return pw_rssi_list

In [3]:
pw_rssi_list = pairwise_rssi_distances()
pw_rssi_list

array([0., 0., 0., ..., 0., 0., 0.], shape=(1007490,))

- Pairwise distances between coordinates

In [4]:
def pairwise_coord_distances(df_test: pd.DataFrame = df_test,
                             metric: str = 'euclidean') -> List[float]:
    """
    Calculate pairwise distances based on coordinates.

    Args:
        df_test (pd.DataFrame): DataFrame containing coordinates with 'x' and 'y' columns.

    Returns:
        List[float]: List of pairwise distances.
    """

    # Extract coordinates
    coords = df_test[['X', 'Y']].values

    # Extract unique coordinates
    #coords = np.unique(coords, axis=0)

    # Calculate pairwise distances
    pw_coord_list = pdist(coords, metric=metric)

    return pw_coord_list

In [5]:
pw_coord_list = pairwise_coord_distances()
pw_coord_list

array([0., 0., 0., ..., 0., 0., 0.], shape=(1007490,))

- Select only the RSSIs whose labels pairwise distances are below a threshold

In [6]:

threshold = 20  # Define a threshold for filtering distances
pw_coord_list = pairwise_coord_distances()
index_below_threshold = pw_coord_list < threshold


In [7]:
# Create a list of tuples with (rho_0, alpha) values
# rho_0 ranges from 40 to 80 with step 2.5
# alpha ranges from 1 to 6 with step 0.25
param_combinations = []

for rho_0 in np.arange(40, 82.5, 2.5):  # 40 to 80 inclusive
    for alpha in [round(a, 2) for a in np.arange(1.0, 7.25, 0.25)]:  # 1.0 to 7.0 with 0.1 step
        param_combinations.append((rho_0, alpha))

- Average pairwise distances with Euclidean metric

In [8]:
mean_pw_distances = []

for rho_0, alpha in param_combinations:
    log_params = LogParams(rho_0=rho_0, alpha=alpha)
    mean_pw_dist = np.mean(pairwise_rssi_distances(distances, log_params, metric='euclidean')[index_below_threshold])
    mean_pw_distances.append({
        'rho_0': rho_0,
        'alpha': alpha,
        'mean_pw_distance': mean_pw_dist
    })

# Convert the results to a DataFrame for better visualization
mean_pw_distances_df = pd.DataFrame(mean_pw_distances)
mean_pw_distances_df.sort_values(by=['rho_0', 'mean_pw_distance'], ascending=[True, False], inplace=True)

In [9]:
# Save the results to a CSV file
output_path = project_root / 'output' / 'mean_pairwise_distances_euclidean.csv'
mean_pw_distances_df.to_csv(output_path, index=False)

- Average pairwise distances with Chebyshev metric

In [10]:
mean_pw_distances = []

for rho_0, alpha in param_combinations:
    log_params = LogParams(rho_0=rho_0, alpha=alpha)
    mean_pw_dist = np.mean(pairwise_rssi_distances(distances, log_params, metric='chebyshev')[index_below_threshold])
    mean_pw_distances.append({
        'rho_0': rho_0,
        'alpha': alpha,
        'mean_pw_distance': mean_pw_dist
    })

# Convert the results to a DataFrame for better visualization
mean_pw_distances_df = pd.DataFrame(mean_pw_distances)
mean_pw_distances_df.sort_values(by=['rho_0', 'mean_pw_distance'], ascending=[True, False], inplace=True)

In [11]:
# Save the results to a CSV file
output_path = project_root / 'output' / 'mean_pairwise_distances_chebyshev.csv'
mean_pw_distances_df.to_csv(output_path, index=False)

- Average pairwise distances with Standardized Euclidean metric

In [19]:
mean_pw_distances = []

for rho_0, alpha in param_combinations:
    log_params = LogParams(rho_0=rho_0, alpha=alpha)
    mean_pw_dist = np.mean(pairwise_rssi_distances(distances, log_params, metric='seuclidean')[index_below_threshold])
    mean_pw_distances.append({
        'rho_0': rho_0,
        'alpha': alpha,
        'mean_pw_distance': mean_pw_dist
    })

# Convert the results to a DataFrame for better visualization
mean_pw_distances_df = pd.DataFrame(mean_pw_distances)
mean_pw_distances_df.sort_values(by=['rho_0', 'mean_pw_distance'], ascending=[True, False], inplace=True)

In [20]:
# Save the results to a CSV file
output_path = project_root / 'output' / 'mean_pairwise_distances_seuclidean.csv'
mean_pw_distances_df.to_csv(output_path, index=False)

- Coefficient of Variation (CV) of pairwise distances with Euclidean metric

In [25]:
cv_pw_distances = []

for rho_0, alpha in param_combinations:
    log_params = LogParams(rho_0=rho_0, alpha=alpha)
    cv_pw_dist = (np.std(pairwise_rssi_distances(distances, log_params, metric='euclidean')[index_below_threshold])/np.mean(pairwise_rssi_distances(distances, log_params, metric='euclidean')[index_below_threshold]))*100
    cv_pw_distances.append({
        'rho_0': rho_0,
        'alpha': alpha,
        'cv_pw_distance': cv_pw_dist
    })

# Convert the results to a DataFrame for better visualization
cv_pw_distances_df = pd.DataFrame(cv_pw_distances)
cv_pw_distances_df.sort_values(by=['rho_0', 'cv_pw_distance'], ascending=[True, True], inplace=True)

In [26]:
# Save the results to a CSV file
output_path = project_root / 'output' / 'cv_pairwise_distances_euclidean.csv'
cv_pw_distances_df.to_csv(output_path, index=False)

- Coefficient of Variation (CV) of pairwise distances with Chebyshev metric

In [23]:
cv_pw_distances = []

for rho_0, alpha in param_combinations:
    log_params = LogParams(rho_0=rho_0, alpha=alpha)
    cv_pw_dist = (np.std(pairwise_rssi_distances(distances, log_params, metric='chebyshev')[index_below_threshold])/np.mean(pairwise_rssi_distances(distances, log_params, metric='chebyshev')[index_below_threshold]))*100
    cv_pw_distances.append({
        'rho_0': rho_0,
        'alpha': alpha,
        'cv_pw_distance': cv_pw_dist
    })

# Convert the results to a DataFrame for better visualization
cv_pw_distances_df = pd.DataFrame(cv_pw_distances)
cv_pw_distances_df.sort_values(by=['rho_0', 'cv_pw_distance'], ascending=[True, True], inplace=True)

In [24]:
# Save the results to a CSV file
output_path = project_root / 'output' / 'cv_pairwise_distances_chebyshev.csv'
cv_pw_distances_df.to_csv(output_path, index=False)

### 2. Sorting the combinations acoording to the average error

In [15]:
# Load the positioning results data
nn_results_file = project_root / 'output' / 'nn_results_by_params.csv'
slsqp_results_file = project_root / 'output' / 'optimized_results_by_params.csv'

# Load the data
nn_results = pd.read_csv(nn_results_file)
slsqp_results = pd.read_csv(slsqp_results_file)

print("Loaded NN results shape:", nn_results.shape)
print("Loaded SLSQP results shape:", slsqp_results.shape)
print("\nNN results columns:", nn_results.columns.tolist())
print("SLSQP results columns:", slsqp_results.columns.tolist())

# Function to get top 3 alphas for each rho_0
def get_top3_alphas_per_rho0(df, algorithm_name):
    """
    For each rho_0 value, find the 3 alpha values that lead to minimum mean_error

    Args:
        df: DataFrame with columns rho_0, alpha, mean_error
        algorithm_name: String identifier for the algorithm

    Returns:
        DataFrame with top 3 alphas per rho_0, sorted in ascending order
    """
    top3_results = []

    # Get unique rho_0 values and sort them
    unique_rho0 = sorted(df['rho_0'].unique())

    for rho_0 in unique_rho0:
        # Filter data for current rho_0
        rho_data = df[df['rho_0'] == rho_0].copy()

        # Sort by mean_error (ascending) and get top 3
        top3_for_rho = rho_data.nsmallest(3, 'mean_error')

        # Sort the top 3 by alpha in ascending order
        top3_for_rho = top3_for_rho.sort_values('alpha')

        # Add rank information
        for rank, (_, row) in enumerate(top3_for_rho.iterrows(), 1):
            top3_results.append({
                'algorithm': algorithm_name,
                'rho_0': rho_0,
                'alpha': row['alpha'],
                'mean_error': row['mean_error']
            })

    return pd.DataFrame(top3_results)

# Get top 3 alphas for NN algorithm
top3_nn = get_top3_alphas_per_rho0(nn_results, 'NN')

# Get top 3 alphas for SLSQP algorithm
top3_slsqp = get_top3_alphas_per_rho0(slsqp_results, 'SLSQP')

# Combine results
top3_combined = pd.concat([top3_nn, top3_slsqp], ignore_index=True)

# Sort by algorithm, rho_0, and rank
top3_combined = top3_combined.sort_values(['algorithm', 'rho_0', 'mean_error'])

print("\n=== TOP 3 ALPHAS PER RHO_0 ANALYSIS ===")
print(f"Total combinations analyzed: {len(top3_combined)}")
print(f"Unique rho_0 values: {len(top3_combined['rho_0'].unique())}")
print(f"Algorithms: {top3_combined['algorithm'].unique().tolist()}")

# Display sample results
print("\nSample results (first 10 rows):")
print(top3_combined.head(10).to_string(index=False))

# Save results to CSV file
output_file = project_root / 'output' / 'top3_alphas_per_rho0.csv'
top3_combined.to_csv(output_file, index=False)
print(f"\nResults saved to: {output_file}")

# Display summary statistics
print("\n=== SUMMARY BY ALGORITHM ===")
for algorithm in ['NN', 'SLSQP']:
    algo_data = top3_combined[top3_combined['algorithm'] == algorithm]
    print(f"\n{algorithm} Algorithm:")
    print(f"  Best overall performance: {algo_data['mean_error'].min():.3f}m")
    print(f"  Worst in top-3: {algo_data['mean_error'].max():.3f}m")
    print(f"  Average of top-3 performances: {algo_data['mean_error'].mean():.3f}m")

    # Most frequently appearing alphas in top 3
    alpha_counts = algo_data['alpha'].value_counts().head(5)
    print("  Most frequent alpha values in top-3:")
    for alpha, count in alpha_counts.items():
        print(f"    α = {alpha}: appears {count} times")

# Cross-algorithm comparison
print("\n=== CROSS-ALGORITHM COMPARISON ===")
nn_best = top3_combined[top3_combined['algorithm'] == 'NN']['mean_error'].min()
slsqp_best = top3_combined[top3_combined['algorithm'] == 'SLSQP']['mean_error'].min()
improvement = nn_best - slsqp_best
improvement_pct = (improvement / nn_best) * 100

print(f"NN best performance: {nn_best:.3f}m")
print(f"SLSQP best performance: {slsqp_best:.3f}m")
print(f"Improvement: {improvement:.3f}m ({improvement_pct:.1f}%)")

Loaded NN results shape: (425, 3)
Loaded SLSQP results shape: (425, 3)

NN results columns: ['rho_0', 'alpha', 'mean_error']
SLSQP results columns: ['rho_0', 'alpha', 'mean_error']

=== TOP 3 ALPHAS PER RHO_0 ANALYSIS ===
Total combinations analyzed: 102
Unique rho_0 values: 17
Algorithms: ['NN', 'SLSQP']

Sample results (first 10 rows):
algorithm  rho_0  alpha  mean_error
       NN   40.0   6.25        3.09
       NN   40.0   6.00        3.12
       NN   40.0   5.75        3.19
       NN   42.5   5.75        3.03
       NN   42.5   6.00        3.09
       NN   42.5   5.50        3.14
       NN   45.0   5.50        3.02
       NN   45.0   5.25        3.05
       NN   45.0   5.75        3.08
       NN   47.5   5.00        2.99

Results saved to: /home/braulio/thesis/output/top3_alphas_per_rho0.csv

=== SUMMARY BY ALGORITHM ===

NN Algorithm:
  Best overall performance: 2.730m
  Worst in top-3: 3.190m
  Average of top-3 performances: 2.954m
  Most frequent alpha values in top-3:
    α = 

### 3. Best parameter combinations (NN, SLSQP(+), Mean-Euclidean, Mean_Chebyshev)

In [None]:
# Load all the required data files
nn_results_file = project_root / 'output' / 'nn_results_by_params.csv'
slsqp_results_file = project_root / 'output' / 'optimized_results_by_params.csv'
euclidean_file = project_root / 'output' / 'mean_pairwise_distances_euclidean.csv'
chebyshev_file = project_root / 'output' / 'mean_pairwise_distances_chebyshev.csv'

# Load the data
nn_results = pd.read_csv(nn_results_file)
slsqp_results = pd.read_csv(slsqp_results_file)
euclidean_data = pd.read_csv(euclidean_file)
chebyshev_data = pd.read_csv(chebyshev_file)

print("Data loaded successfully:")
print(f"NN results: {nn_results.shape}")
print(f"SLSQP results: {slsqp_results.shape}")
print(f"Euclidean distances: {euclidean_data.shape}")
print(f"Chebyshev distances: {chebyshev_data.shape}")

def get_optimal_params_for_algorithm(df, metric_column, algorithm_name, minimize=True, slsqp_data=None):
    """
    Get optimal alpha for each rho_0 based on the specified metric

    Args:
        df: DataFrame with rho_0, alpha, and metric columns
        metric_column: Column name to optimize
        algorithm_name: Name of the algorithm
        minimize: If True, find minimum; if False, find maximum
        slsqp_data: DataFrame with SLSQP results for cross-referencing mean_error

    Returns:
        List of dictionaries with optimal parameters
    """
    optimal_results = []

    # Get unique rho_0 values and sort them
    unique_rho0 = sorted(df['rho_0'].unique())

    for rho_0 in unique_rho0:
        # Filter data for current rho_0
        rho_data = df[df['rho_0'] == rho_0].copy()

        # Find optimal value (minimum or maximum)
        if minimize:
            optimal_row = rho_data.loc[rho_data[metric_column].idxmin()]
        else:
            optimal_row = rho_data.loc[rho_data[metric_column].idxmax()]

        # For Mean-Euclidean and Mean-Chebyshev, get mean_error from SLSQP data
        if algorithm_name in ['Mean-Euclidean', 'Mean-Chebyshev'] and slsqp_data is not None:
            # Find corresponding SLSQP result for this rho_0 and alpha combination
            slsqp_match = slsqp_data[
                (slsqp_data['rho_0'] == rho_0) &
                (slsqp_data['alpha'] == optimal_row['alpha'])
            ]

            if not slsqp_match.empty:
                mean_error_value = slsqp_match['mean_error'].iloc[0]
            else:
                # If exact match not found, use closest alpha value
                slsqp_rho_data = slsqp_data[slsqp_data['rho_0'] == rho_0]
                closest_alpha_idx = (slsqp_rho_data['alpha'] - optimal_row['alpha']).abs().idxmin()
                mean_error_value = slsqp_data.loc[closest_alpha_idx, 'mean_error']
        else:
            # For NN and SLSQP(+), use the original metric value
            mean_error_value = optimal_row[metric_column]

        optimal_results.append({
            'algorithm': algorithm_name,
            'rho_0': rho_0,
            'alpha': optimal_row['alpha'],
            'mean_error': mean_error_value
        })

    return optimal_results

# Get optimal parameters for each algorithm
print("\n=== FINDING OPTIMAL PARAMETERS ===")

# 1. NN Algorithm - minimize mean_error
nn_optimal = get_optimal_params_for_algorithm(
    nn_results, 'mean_error', 'NN', minimize=True
)
print(f"NN optimal parameters found: {len(nn_optimal)} combinations")

# 2. SLSQP(+) Algorithm - minimize mean_error
slsqp_optimal = get_optimal_params_for_algorithm(
    slsqp_results, 'mean_error', 'SLSQP(+)', minimize=True
)
print(f"SLSQP(+) optimal parameters found: {len(slsqp_optimal)} combinations")

# 3. Mean-Euclidean - maximize mean_pw_distance, get mean_error from SLSQP
euclidean_optimal = get_optimal_params_for_algorithm(
    euclidean_data, 'mean_pw_distance', 'Mean-Euclidean', minimize=False, slsqp_data=slsqp_results
)
print(f"Mean-Euclidean optimal parameters found: {len(euclidean_optimal)} combinations")

# 4. Mean-Chebyshev - maximize mean_pw_distance, get mean_error from SLSQP
chebyshev_optimal = get_optimal_params_for_algorithm(
    chebyshev_data, 'mean_pw_distance', 'Mean-Chebyshev', minimize=False, slsqp_data=slsqp_results
)
print(f"Mean-Chebyshev optimal parameters found: {len(chebyshev_optimal)} combinations")

# Combine all results
all_optimal_results = nn_optimal + slsqp_optimal + euclidean_optimal + chebyshev_optimal

# Create DataFrame
optimal_params_df = pd.DataFrame(all_optimal_results)

# Sort by algorithm and rho_0
optimal_params_df = optimal_params_df.sort_values(['algorithm', 'rho_0'])

print("\n=== COMBINED RESULTS ===")
print(f"Total optimal combinations: {len(optimal_params_df)}")
print(f"Algorithms: {optimal_params_df['algorithm'].unique().tolist()}")
print(f"rho_0 range: {optimal_params_df['rho_0'].min()} - {optimal_params_df['rho_0'].max()}")

# Display sample results
print("\nSample results (first 20 rows):")
print(optimal_params_df.head(20).to_string(index=False))

# Save to CSV file
output_file = project_root / 'output' / 'optimal_parameters_by_algorithm.csv'
optimal_params_df.to_csv(output_file, index=False)
print(f"\nResults saved to: {output_file}")

# Display summary statistics by algorithm
print("\n=== SUMMARY BY ALGORITHM ===")
for algorithm in optimal_params_df['algorithm'].unique():
    algo_data = optimal_params_df[optimal_params_df['algorithm'] == algorithm]
    print(f"\n{algorithm}:")
    print(f"  Number of rho_0 values: {len(algo_data)}")
    print(f"  Alpha range: {algo_data['alpha'].min():.2f} - {algo_data['alpha'].max():.2f}")
    print(f"  Mean_error range: {algo_data['mean_error'].min():.3f} - {algo_data['mean_error'].max():.3f}")
    print(f"  Most frequent alpha: {algo_data['alpha'].mode().iloc[0]:.2f}")

    # Show best performance for each algorithm
    if algorithm in ['NN', 'SLSQP(+)']:
        best_idx = algo_data['mean_error'].idxmin()
        print(f"  Best performance: {algo_data.loc[best_idx, 'mean_error']:.3f} at (ρ₀={algo_data.loc[best_idx, 'rho_0']}, α={algo_data.loc[best_idx, 'alpha']})")
    else:
        best_idx = algo_data['mean_error'].idxmax()
        print(f"  Best performance: {algo_data.loc[best_idx, 'mean_error']:.3f} at (ρ₀={algo_data.loc[best_idx, 'rho_0']}, α={algo_data.loc[best_idx, 'alpha']})")

# Create a pivot table for easy comparison
print("\n=== PIVOT TABLE: OPTIMAL ALPHA VALUES BY ALGORITHM AND RHO_0 ===")
pivot_alpha = optimal_params_df.pivot(index='rho_0', columns='algorithm', values='alpha')
print(pivot_alpha.round(2))

print("\n=== PIVOT TABLE: OPTIMAL MEAN_ERROR VALUES BY ALGORITHM AND RHO_0 ===")
pivot_error = optimal_params_df.pivot(index='rho_0', columns='algorithm', values='mean_error')
print(pivot_error.round(3))

print(f"\nAnalysis complete! Results saved to: {output_file}")

Data loaded successfully:
NN results: (425, 3)
SLSQP results: (425, 3)
Euclidean distances: (425, 3)
Chebyshev distances: (425, 3)

=== FINDING OPTIMAL PARAMETERS ===
NN optimal parameters found: 17 combinations
SLSQP(+) optimal parameters found: 17 combinations
Mean-Euclidean optimal parameters found: 17 combinations
Mean-Chebyshev optimal parameters found: 17 combinations

=== COMBINED RESULTS ===
Total optimal combinations: 68
Algorithms: ['Mean-Chebyshev', 'Mean-Euclidean', 'NN', 'SLSQP(+)']
rho_0 range: 40.0 - 80.0

Sample results (first 20 rows):
     algorithm  rho_0  alpha  mean_error
Mean-Chebyshev   40.0   5.50        2.85
Mean-Chebyshev   42.5   5.25        2.84
Mean-Chebyshev   45.0   5.25        2.71
Mean-Chebyshev   47.5   5.00        2.71
Mean-Chebyshev   50.0   4.75        2.69
Mean-Chebyshev   52.5   4.50        2.67
Mean-Chebyshev   55.0   4.25        2.65
Mean-Chebyshev   57.5   4.00        2.65
Mean-Chebyshev   60.0   3.75        2.66
Mean-Chebyshev   62.5   3.50   