# Customer Segmentation

<img src="../../reports/figures/logo_cmsr92.png" alt="CMSR92" height="60" style="display:block;margin:auto;">

**Segmentación profesional de clientes para optimización de estrategias.**

> Los gráficos y cuadros de insights emplean colores profesionales y contrastantes, con el logo CMSR92 solo en la cabecera.

In [1]:
import os, pandas as pd
from pathlib import Path
from sklearn.cluster import KMeans
import sys

# Resolver raíz del proyecto de forma robusta

def find_root(start=None):
    p = Path(start or Path.cwd()).resolve()
    for _ in range(6):
        if (p / 'requirements.txt').exists() and (p / 'src').exists():
            return p
        if (p / '.git').exists() and (p / 'src').exists():
            return p
        p = p.parent
    return Path.cwd()

ROOT = find_root()
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))

from src.visualization import plot_segments_scatter

FEAT_PATH = ROOT / 'data' / 'processed' / 'model_features.csv'
SEG_PATH = ROOT / 'data' / 'processed' / 'customer_segments.csv'

print('cwd:', os.getcwd())
print('ROOT:', ROOT)
print('Features existe?', FEAT_PATH.exists(), '->', FEAT_PATH)

try:
    feat = pd.read_csv(FEAT_PATH)
    # Selecciona columnas numéricas para clusterizar
    X = feat.select_dtypes(include=['int64','float64']).fillna(0)
    kmeans = KMeans(n_clusters=4, n_init=10, random_state=42)
    feat['segment'] = kmeans.fit_predict(X)
    feat.to_csv(SEG_PATH, index=False)
    print('Segmentación guardada en', SEG_PATH)
    fig = plot_segments_scatter(feat, x='gasto_promedio', y='gasto_total_est', color='segment')
    fig.show()
except Exception as e:
    print('AVISO:', e)

cwd: C:\Users\Carlos\OneDrive\Documents\GitHub\cine\notebooks
ROOT: C:\Users\Carlos\OneDrive\Documents\GitHub\cine
Features existe? True -> C:\Users\Carlos\OneDrive\Documents\GitHub\cine\data\processed\model_features.csv


Segmentación guardada en C:\Users\Carlos\OneDrive\Documents\GitHub\cine\data\processed\customer_segments.csv
