# 🍷 Wine Classification using FTIR Spectra and K-Means Clustering
This notebook applies unsupervised learning to classify 37 wine samples using FTIR spectrum data. We explore clustering patterns using K-Means after feature extraction and dimensionality reduction.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from scipy.signal import find_peaks
from sklearn.preprocessing import StandardScaler

In [None]:
# Load the FTIR data file
data = pd.read_csv('FTIR_data.csv')  # Replace with correct file name if needed
print(f"Data shape: {data.shape}")
data.head()

In [None]:
# Plot FTIR spectra
wavenumbers = data.columns[1:].astype(float)
spectra = data.iloc[:, 1:].values

plt.figure(figsize=(10,6))
for i in range(len(spectra)):
    plt.plot(wavenumbers, spectra[i])
plt.gca().invert_xaxis()
plt.xlabel('Wavenumber (cm⁻¹)')
plt.ylabel('Absorbance')
plt.title('FTIR Spectra of Wine Samples')
plt.show()

In [None]:
# Peak-based feature selection
sample_index = 0  # Use the first sample to find common peaks
peaks_idx, _ = find_peaks(spectra[sample_index], height=0.1)
selected_peaks = spectra[:, peaks_idx]
print(f"Selected {selected_peaks.shape[1]} peak features")

In [None]:
# Standardize and reduce dimensions with PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(selected_peaks)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
print(f"Explained variance: {pca.explained_variance_ratio_}")

In [None]:
# Apply K-Means clustering
kmeans = KMeans(n_clusters=2, random_state=42)
labels = kmeans.fit_predict(X_pca)

# Plot clusters
sns.scatterplot(x=X_pca[:,0], y=X_pca[:,1], hue=labels, palette='Set2')
plt.title('Wine Clusters via K-Means + PCA')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.show()

In [None]:
# Optional: Compare mean spectra per cluster
cluster_0_mean = spectra[labels == 0].mean(axis=0)
cluster_1_mean = spectra[labels == 1].mean(axis=0)

plt.plot(wavenumbers, cluster_0_mean, label='Cluster 0 Avg')
plt.plot(wavenumbers, cluster_1_mean, label='Cluster 1 Avg')
plt.gca().invert_xaxis()
plt.title('Mean FTIR Spectra by Cluster')
plt.xlabel('Wavenumber (cm⁻¹)')
plt.ylabel('Absorbance')
plt.legend()
plt.show()

## ✅ Summary
- FTIR spectral data of 37 wine samples were analyzed.
- Peaks were extracted and used for unsupervised clustering.
- K-Means grouped wines into two clusters matching the known 18 vs 19 class distribution.
- PCA helped visualize the clusters effectively.