# Fuzzy Clustering with Neural Networks

This notebook implements customer categorization using fuzzy clustering with neural network approaches.

## Objectives
1. Load processed customer data
2. Build a neural network-based fuzzy clustering model
3. Train and evaluate the model
4. Compare with traditional ML approach

## Setup

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from pathlib import Path

# Set visualization style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print("Libraries imported successfully!")

## 1. Load Processed Data

In [None]:
# Load processed data
DATA_DIR = Path('../data/processed')

# TODO: Load your processed customer data
# df = pd.read_csv(DATA_DIR / 'customers_cleaned.csv')
# print(f"Loaded {len(df)} customer records")

## 2. Data Preprocessing

In [None]:
# TODO: Select features and normalize
# features = ['feature1', 'feature2', 'feature3']  # Select relevant features
# X = df[features].values

# Standardize features
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# Split data for validation
# X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
# print(f"Training samples: {len(X_train)}, Validation samples: {len(X_val)}")

## 3. Build Fuzzy Neural Network Model

Create an autoencoder-based architecture for fuzzy clustering.

In [None]:
# TODO: Build neural network model
# n_features = X_scaled.shape[1]
# n_clusters = 5
# encoding_dim = 10

# # Encoder
# encoder_input = layers.Input(shape=(n_features,))
# encoded = layers.Dense(64, activation='relu')(encoder_input)
# encoded = layers.Dense(32, activation='relu')(encoded)
# encoded = layers.Dense(encoding_dim, activation='relu')(encoded)

# # Clustering layer (soft assignment)
# cluster_layer = layers.Dense(n_clusters, activation='softmax', name='cluster_output')(encoded)

# # Decoder
# decoded = layers.Dense(32, activation='relu')(encoded)
# decoded = layers.Dense(64, activation='relu')(decoded)
# decoder_output = layers.Dense(n_features, activation='linear', name='reconstruction_output')(decoded)

# # Create model
# model = models.Model(inputs=encoder_input, outputs=[cluster_layer, decoder_output])

# model.summary()

## 4. Compile and Train Model

In [None]:
# TODO: Compile model
# model.compile(
#     optimizer='adam',
#     loss={
#         'cluster_output': 'categorical_crossentropy',
#         'reconstruction_output': 'mse'
#     },
#     loss_weights={
#         'cluster_output': 0.5,
#         'reconstruction_output': 0.5
#     },
#     metrics=['accuracy']
# )

# # Note: For unsupervised clustering, you may need to use a custom training loop
# # or pretrain with autoencoder reconstruction only

In [None]:
# TODO: Train model
# # Early stopping callback
# early_stopping = keras.callbacks.EarlyStopping(
#     monitor='val_loss',
#     patience=10,
#     restore_best_weights=True
# )

# # Train model
# history = model.fit(
#     X_train,
#     [X_train, X_train],  # For unsupervised learning
#     epochs=100,
#     batch_size=32,
#     validation_data=(X_val, [X_val, X_val]),
#     callbacks=[early_stopping],
#     verbose=1
# )

## 5. Visualize Training History

In [None]:
# TODO: Plot training history
# plt.figure(figsize=(12, 4))

# plt.subplot(1, 2, 1)
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.title('Model Loss')

# plt.subplot(1, 2, 2)
# plt.plot(history.history['reconstruction_output_loss'], label='Reconstruction Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.title('Reconstruction Loss')

# plt.tight_layout()
# plt.show()

## 6. Cluster Assignment

In [None]:
# TODO: Get cluster assignments
# cluster_probs, _ = model.predict(X_scaled)
# cluster_labels = np.argmax(cluster_probs, axis=1)

# df['cluster_nn'] = cluster_labels

# # Add membership probabilities
# for i in range(n_clusters):
#     df[f'nn_membership_cluster_{i}'] = cluster_probs[:, i]

# print(df['cluster_nn'].value_counts())

## 7. Cluster Visualization

In [None]:
# TODO: Visualize clusters
# from sklearn.decomposition import PCA

# pca = PCA(n_components=2)
# X_pca = pca.fit_transform(X_scaled)

# plt.figure(figsize=(12, 8))
# scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=cluster_labels, 
#                       cmap='viridis', alpha=0.6, s=50)
# plt.colorbar(scatter)
# plt.xlabel('First Principal Component')
# plt.ylabel('Second Principal Component')
# plt.title('Customer Segments (Neural Network)')
# plt.tight_layout()
# plt.show()

## 8. Compare with Traditional ML Approach

In [None]:
# TODO: Load traditional ML results and compare
# df_fcm = pd.read_csv(DATA_DIR / 'customers_with_clusters_fcm.csv')

# # Compare cluster distributions
# print("Traditional ML cluster distribution:")
# print(df_fcm['cluster'].value_counts())
# print("\nNeural Network cluster distribution:")
# print(df['cluster_nn'].value_counts())

## 9. Save Results

In [None]:
# TODO: Save results
# OUTPUT_DIR = Path('../data/processed')
# df.to_csv(OUTPUT_DIR / 'customers_with_clusters_nn.csv', index=False)

# # Save model
# model.save('../models/fuzzy_clustering_nn.h5')
# print("Results and model saved!")

## Summary and Next Steps

1. Evaluate which approach (traditional ML vs NN) works better for your data
2. Fine-tune hyperparameters
3. Validate clusters with business stakeholders
4. Deploy the model for production use