# BÓNUS 3D VISUALIZATION

### **Some imports:**

In [24]:
import pandas as pd
import numpy as np
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go

df_minmax = pd.read_csv("df_minmax.csv")

In [25]:
!pip install plotly ipywidgets scikit-learn pyarrow



### **Functions needed:**

In [26]:
import os

# helpers
def make_range_slider(col, desc=None):
    if col not in DF.columns:
        return None
    vmin = float(np.nanmin(DF[col].values))
    vmax = float(np.nanmax(DF[col].values))
    if np.isclose(vmin, vmax):
        vmax = vmin + 1e-6
    return widgets.FloatRangeSlider(
        value=[vmin, vmax],
        min=vmin,
        max=vmax,
        step=(vmax - vmin) / 200 if vmax > vmin else 0.01,
        description=desc or col,
        continuous_update=False,
        layout=widgets.Layout(width="420px")
    )

def apply_filters():
    fdf = DF[DF[CLUSTER_COL].isin(list(w_clusters.value))].copy()

    # map descriptions 
    desc_to_col = {
        "Flights": "NumFlights_Total",
        "Pts Acc": "PointsAccumulated_Total",
        "Pts Red": "PointsRedeemed_Total",
        "Income_log": "Income_log",
        "CLV_log": "CLV_log"
    }

    for rw in range_widgets:
        col = desc_to_col.get(rw.description, rw.description)
        lo, hi = rw.value
        if col in fdf.columns:
            fdf = fdf[fdf[col].between(lo, hi)]

    return fdf

def update_customer_dropdown(fdf):
    ids = fdf[ID_COL].astype(str).unique().tolist()
    ids = ids[:5000]  
    w_customer.options = ids
    w_customer.value = ids[0] if len(ids) > 0 else None

def draw_plot_widget(fdf):
    hover_cols = [ID_COL, CLUSTER_COL] + [c for c in [
        "Income_log","CLV_log","NumFlights_Total",
        "PointsAccumulated_Total","PointsRedeemed_Total"
    ] if c in fdf.columns]

    fig = px.scatter_3d(
        fdf,
        x="PC1", y="PC2", z="PC3",
        color=CLUSTER_COL,
        hover_data=hover_cols,
        opacity=0.85
    )
    fig.update_layout(height=650, legend_title_text="Cluster")


    return fig

def on_export_clicked(btn):
    fdf = apply_filters()
    with out_export:
        clear_output(wait=True)
        if len(fdf) == 0:
            print("Nothing to export (empty after filters).")
            return
        fname = "filtered_clusters.csv"
        fdf.to_csv(fname, index=False)
        print(f"Exported: {fname} | Rows: {len(fdf):,} | Path: {os.path.abspath(fname)}")

def render(_=None):
    fdf = apply_filters()

    # plot
    with out_plot:
        clear_output(wait=True)
        print(f"Filtered rows: {len(fdf):,} / {len(DF):,}")
        if len(fdf) == 0:
            print("No data after filters.")
        else:
            update_customer_dropdown(fdf)
            figw = draw_plot_widget(fdf)
            display(figw)

    # detail
    with out_detail:
        clear_output(wait=True)
        if len(fdf) == 0 or w_customer.value is None:
            print("No customer to display.")
            return

        row = fdf[fdf[ID_COL].astype(str) == str(w_customer.value)].iloc[0]
        detail_cols = [ID_COL, CLUSTER_COL] + [c for c in FEATURES if c in fdf.columns]
        display(row[detail_cols].to_frame().T)  # 


### **Metrics needed**

In [27]:
metric_features = [
    # value
    'CLV_log',           
    'Income_log',        
    'PointsAccumulated_Total',  
    'MembershipMonths', 
    # behavior
    'NumFlights_Total',                
    'NumFlightsWithCompanions_Total',   
    'CompainFlights%',                      
    'PointsRedeemed_Total' 
]


As a complement to the segmentation analysis, a three-dimensional (3D) visualization of customers was built with the aim of visually exploring and validating the separation between clusters. To this end, the PCA (Principal Component Analysis) technique was applied to the standardized variables.
In addition, interactive filters were included per cluster, allowing for a more detailed exploratory analysis of each segment in isolation, making the visualization an effective tool for interpreting and communicating the results.

In [28]:
DF = df_minmax.copy()              
CLUSTER_COL = "final_cluster"       
ID_COL = "Loyalty#" if "Loyalty#" in DF.columns else None

if ID_COL is None:
    DF["customer_id"] = DF.index.astype(str)
    ID_COL = "customer_id"

# features to use 
FEATURES = metric_features


# B) CREATE3D COORDINATES (PCA) FOR VISUALIZATION

X = DF[FEATURES].copy()

X = X.apply(pd.to_numeric, errors="coerce").fillna(0)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=3, random_state=42)
coords = pca.fit_transform(X_scaled)

DF["PC1"] = coords[:, 0]
DF["PC2"] = coords[:, 1]
DF["PC3"] = coords[:, 2]

# WIDGETS (FILTERS)

clusters = sorted(DF[CLUSTER_COL].unique().tolist())

w_clusters = widgets.SelectMultiple(
    options=clusters,
    value=tuple(clusters),
    description="Clusters",
    layout=widgets.Layout(width="300px", height="120px")
)

# Some filters
w_income = make_range_slider("Income_log", "Income_log")
w_clv = make_range_slider("CLV_log", "CLV_log")
w_flights = make_range_slider("NumFlights_Total", "Flights")
w_points_acc = make_range_slider("PointsAccumulated_Total", "Pts Acc")
w_points_red = make_range_slider("PointsRedeemed_Total", "Pts Red")
w_membership_months = make_range_slider("MembershipMonths", "Memb Ship Months")
w_numbflights_companion_tot = make_range_slider("NumFlightsWithCompanions_Total", "Numflights Companion Tot")
w_companionflights = make_range_slider("CompainFlights%", "Companion flights")


range_widgets = [w for w in [w_income, w_clv, w_flights, w_points_acc, w_points_red, w_membership_months, w_numbflights_companion_tot, w_companionflights] if w is not None]

# dropdown of customers (Loyalty#)
w_customer = widgets.Dropdown(
    options=[],
    description="Customer",
    layout=widgets.Layout(width="420px")
)

# Export button
w_export = widgets.Button(
    description="Export filtered CSV",
    button_style="success"
)

# outputs
out_plot = widgets.Output()
out_detail = widgets.Output()
out_export = widgets.Output()

# bind export
w_export.on_click(on_export_clicked)

# bind render on change
w_clusters.observe(lambda c: render(), names="value")
for rw in range_widgets:
    rw.observe(lambda c: render(), names="value")
w_customer.observe(lambda c: render(), names="value")

controls_left = widgets.VBox([w_clusters] + range_widgets)
controls_right = widgets.VBox([w_customer, w_export, out_export])

ui = widgets.HBox([
    controls_left,
    widgets.VBox([controls_right, out_plot, out_detail])
])

display(ui)

# first render
render()

HBox(children=(VBox(children=(SelectMultiple(description='Clusters', index=(0, 1, 2), layout=Layout(height='12…