In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="joblib")

from sklearn.cluster import SpectralCoclustering

# Load and prepare data
whisky = pd.read_csv("data/whiskies.txt")
regions = pd.read_csv("data/regions.txt", header=None)
whisky["Region"] = regions
flavors = whisky.iloc[:, 2:14]

# Apply Spectral Co-Clustering
model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(flavors)

# Assign groups and reorder
whisky["Group"] = model.row_labels_
whisky_sorted = whisky.iloc[np.argsort(model.row_labels_)].reset_index(drop=True)

# Group summaries
flavor_summary = whisky.groupby("Group")[flavors.columns].mean().round(2)
region_summary = whisky.groupby(["Group", "Region"]).size().unstack(fill_value=0)

# Output
print("Flavor Summary per Cluster:\n", flavor_summary)
print("\nRegion Distribution per Cluster:\n", region_summary)

# Save to CSV
flavor_summary.to_csv("outputs/cluster_flavor_summary.csv")
region_summary.to_csv("outputs/cluster_region_summary.csv")
whisky_sorted.to_csv("outputs/whisky_sorted_by_cluster.csv", index=False)


Flavor Summary per Cluster:
        Body  Sweetness  Smoky  Medicinal  Tobacco  Honey  Spicy  Winey  Nutty  \
Group                                                                           
0      2.30       1.80   2.50       2.00      0.0   1.10   1.70   0.50   1.60   
1      1.50       2.44   1.19       0.25      0.0   0.97   1.22   0.31   1.19   
3      3.50       1.50   3.75       3.25      1.0   0.00   1.25   0.75   1.25   
4      2.41       2.38   1.32       0.06      0.0   1.74   1.41   1.74   1.76   
5      1.83       2.33   1.50       0.67      1.0   1.00   1.67   1.17   1.17   

       Malty  Fruity  Floral  
Group                         
0       1.60    1.30    0.90  
1       1.78    1.91    2.25  
3       1.00    1.00    0.25  
4       1.91    2.00    1.56  
5       2.17    1.50    1.83  

Region Distribution per Cluster:
 Region  #####Regions#####  Campbelltown  Highlands  Islands  Islay  Lowlands  \
Group                                                                  