# Interclass and Intraclass similarities with 3 different metrics

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Read dataset
X = pd.read_excel("../coffeeDataSynthesized.xlsx", "dataset")

# Extracting labels
y = np.where(X["type"] == "robusta", 0, 1)  # Convert type to numeric: 0 for robusta, 1 for arabica
y = pd.Series(y)

# Extracting the features for clustering
X_features = X[['width', 'height', 'depth', 'weight']]

# Normalizing the features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X_features)

# Splitting data into classes
X_robusta = X_normalized[y == 0]
X_arabica = X_normalized[y == 1]

# **Euclidean Distance (L1 Norm)**
# Intra-class similarity (robusta)
intra_robusta = pairwise_distances(X_robusta, metric='euclidean')
mean_intra_robusta = np.mean(intra_robusta)

# Intra-class similarity (arabica)
intra_arabica = pairwise_distances(X_arabica, metric='euclidean')
mean_intra_arabica = np.mean(intra_arabica)

# Inter-class similarity (robusta vs arabica)
inter_class = pairwise_distances(X_robusta, X_arabica, metric='euclidean')
mean_inter_class = np.mean(inter_class)


# **Manhattan Distance (L1 Norm)**
# Intra-class similarity (robusta)
intra_robusta_manhattan = pairwise_distances(X_robusta, metric='manhattan')
mean_intra_robusta_manhattan = np.mean(intra_robusta_manhattan)

# Intra-class similarity (arabica)
intra_arabica_manhattan = pairwise_distances(X_arabica, metric='manhattan')
mean_intra_arabica_manhattan = np.mean(intra_arabica_manhattan)

# Inter-class similarity (robusta vs arabica)
inter_class_manhattan = pairwise_distances(X_robusta, X_arabica, metric='manhattan')
mean_inter_class_manhattan = np.mean(inter_class_manhattan)

# **Cosine Similarity**
# Intra-class similarity (robusta)
cosine_robusta = cosine_similarity(X_robusta)
mean_cosine_robusta = np.mean(cosine_robusta)

# Intra-class similarity (arabica)
cosine_arabica = cosine_similarity(X_arabica)
mean_cosine_arabica = np.mean(cosine_arabica)

# Inter-class similarity (robusta vs arabica)
cosine_inter_class = cosine_similarity(X_robusta, X_arabica)
mean_cosine_inter_class = np.mean(cosine_inter_class)

# Display results
print(f'Mean Intra-Class Similarity (Robusta - Euclidean): {mean_intra_robusta}')
print(f'Mean Intra-Class Similarity (Arabica - Euclidean): {mean_intra_arabica}')
print(f'Mean Inter-Class Similarity (Robusta vs Arabica - Euclidean): {mean_inter_class}')

print()

print(f'Mean Intra-Class Similarity (Robusta - Manhattan): {mean_intra_robusta_manhattan}')
print(f'Mean Intra-Class Similarity (Arabica - Manhattan): {mean_intra_arabica_manhattan}')
print(f'Mean Inter-Class Similarity (Robusta vs Arabica - Manhattan): {mean_inter_class_manhattan}')

print()

print(f'Mean Intra-Class Similarity (Robusta - Cosine): {mean_cosine_robusta}')
print(f'Mean Intra-Class Similarity (Arabica - Cosine): {mean_cosine_arabica}')
print(f'Mean Inter-Class Similarity (Robusta vs Arabica - Cosine): {mean_cosine_inter_class}')


Mean Intra-Class Similarity (Robusta - Euclidean): 2.5899776083485926
Mean Intra-Class Similarity (Arabica - Euclidean): 2.247596220366747
Mean Inter-Class Similarity (Robusta vs Arabica - Euclidean): 2.8411348242605876

Mean Intra-Class Similarity (Robusta - Manhattan): 4.172070757759082
Mean Intra-Class Similarity (Arabica - Manhattan): 3.6861742640973105
Mean Inter-Class Similarity (Robusta vs Arabica - Manhattan): 4.76755682989604

Mean Intra-Class Similarity (Robusta - Cosine): 0.10574167576080255
Mean Intra-Class Similarity (Arabica - Cosine): 0.16930787444266063
Mean Inter-Class Similarity (Robusta vs Arabica - Cosine): -0.12573756099922404
