In [1]:
"""
EDA Completo - Fintech Tinvest
An√°lisis Exploratorio, Cohortes, RFM, Visualizaciones y Modelos Predictivos
"""

'\nEDA Completo - Fintech Tinvest\nAn√°lisis Exploratorio, Cohortes, RFM, Visualizaciones y Modelos Predictivos\n'

# An√°lisis Completo - Tinvest

Este notebook contiene el an√°lisis completo de datos de Tinvest incluyendo:
- An√°lisis Exploratorio de Datos (EDA)
- An√°lisis de Cohortes y Retenci√≥n
- Segmentaci√≥n RFM
- Visualizaciones Ejecutivas
- Modelos Predictivos de Churn
- Forecasting con Prophet

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
import pickle
import os
warnings.filterwarnings('ignore')

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (classification_report, confusion_matrix, roc_auc_score,
                             roc_curve, f1_score, accuracy_score)

# Prophet
try:
    from prophet import Prophet
    PROPHET_DISPONIBLE = True
except ImportError:
    PROPHET_DISPONIBLE = False
    print("‚ö†Ô∏è Prophet no instalado. Ejecuta: pip install prophet")

# Configuraci√≥n de visualizaci√≥n
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: f'{x:,.2f}')

## 1. Carga y Validaci√≥n de Datos

In [3]:
clients = pd.read_csv('clients.csv')
transactions = pd.read_csv('transactions.csv')
portfolio = pd.read_csv('portfolio_balance.csv')

print(f"‚úì Clientes: {len(clients):,} registros")
print(f"‚úì Transacciones: {len(transactions):,} registros")
print(f"‚úì Portfolio: {len(portfolio):,} registros")

‚úì Clientes: 800 registros
‚úì Transacciones: 22,220 registros
‚úì Portfolio: 76,800 registros


In [4]:
clients['registration_date'] = pd.to_datetime(clients['registration_date'])
transactions['date'] = pd.to_datetime(transactions['date'])
portfolio['date'] = pd.to_datetime(portfolio['date'])

print(f"\nRango temporal clientes: {clients['registration_date'].min()} a {clients['registration_date'].max()}")
print(f"Rango temporal transacciones: {transactions['date'].min()} a {transactions['date'].max()}")
print(f"Rango temporal portfolio: {portfolio['date'].min()} a {portfolio['date'].max()}")


Rango temporal clientes: 2021-01-02 00:00:00 a 2024-06-28 00:00:00
Rango temporal transacciones: 2023-01-01 00:00:00 a 2024-12-31 00:00:00
Rango temporal portfolio: 2023-01-31 00:00:00 a 2024-12-31 00:00:00


## 2. An√°lisis Exploratorio de Datos (EDA)

In [5]:
print("=== CALIDAD DE DATOS ===\n")

for nombre, df in [("CLIENTS", clients), ("TRANSACTIONS", transactions), ("PORTFOLIO", portfolio)]:
    print(f"{nombre}:")
    print(f"  Registros: {len(df):,}")
    print(f"  Duplicados: {df.duplicated().sum():,}")
    nulos = df.isnull().sum()
    if nulos.sum() > 0:
        print(f"  Nulos: {nulos[nulos > 0].to_dict()}")
    else:
        print("  Nulos: 0")
    print()

=== CALIDAD DE DATOS ===

CLIENTS:
  Registros: 800
  Duplicados: 0
  Nulos: 0

TRANSACTIONS:
  Registros: 22,220
  Duplicados: 13
  Nulos: 0

PORTFOLIO:
  Registros: 76,800
  Duplicados: 0
  Nulos: 0



In [6]:
print("=== ESTAD√çSTICAS - CLIENTES ===\n")
print("Variables Num√©ricas:")
print(clients[['age', 'income_monthly', 'risk_score']].describe())

print("\nDistribuci√≥n por Segmento:")
print(clients['segment'].value_counts())
print("\nProporci√≥n (%):")
print((clients['segment'].value_counts(normalize=True) * 100).round(1))

=== ESTAD√çSTICAS - CLIENTES ===

Variables Num√©ricas:
         age  income_monthly  risk_score
count 800.00          800.00      800.00
mean   34.71    5,628,562.50        0.27
std     7.91    4,533,938.88        0.15
min    20.00      900,000.00        0.00
25%    29.00    3,000,000.00        0.17
50%    34.00    4,150,000.00        0.26
75%    40.00    6,400,000.00        0.38
max    60.00   37,900,000.00        0.78

Distribuci√≥n por Segmento:
segment
retail     634
premium    166
Name: count, dtype: int64

Proporci√≥n (%):
segment
retail    79.20
premium   20.80
Name: proportion, dtype: float64


In [7]:
print("\n=== ESTAD√çSTICAS - TRANSACCIONES ===\n")
print("Distribuci√≥n por Tipo:")
print(transactions['type'].value_counts())

print("\nDistribuci√≥n por Producto:")
print(transactions['product'].value_counts())

print("\nEstad√≠sticas por Tipo:")
print(transactions.groupby('type')['amount'].describe())

total_deposits = transactions[transactions['type'] == 'deposit']['amount'].sum()
total_withdrawals = transactions[transactions['type'] == 'withdrawal']['amount'].sum()
net_flow = total_deposits - total_withdrawals

print(f"\nüí∞ FLUJO DE DINERO:")
print(f"Total Dep√≥sitos: ${total_deposits:,.0f}")
print(f"Total Retiros: ${total_withdrawals:,.0f}")
print(f"NNM (Net New Money): ${net_flow:,.0f}")


=== ESTAD√çSTICAS - TRANSACCIONES ===

Distribuci√≥n por Tipo:
type
deposit       15251
withdrawal     6969
Name: count, dtype: int64

Distribuci√≥n por Producto:
product
FIC         11263
CDT          5999
ACCIONES     3371
FPV          1587
Name: count, dtype: int64

Estad√≠sticas por Tipo:
               count         mean          std       min        25%  \
type                                                                  
deposit    15,251.00 1,253,147.33 1,305,327.39 50,000.00 450,000.00   
withdrawal  6,969.00 1,257,002.44 1,509,359.83 50,000.00 400,000.00   

                  50%          75%           max  
type                                              
deposit    800,000.00 1,500,000.00 12,500,000.00  
withdrawal 750,000.00 1,600,000.00 38,250,000.00  

üí∞ FLUJO DE DINERO:
Total Dep√≥sitos: $19,111,750,000
Total Retiros: $8,760,050,000
NNM (Net New Money): $10,351,700,000


## 3. An√°lisis de Cohortes y Retenci√≥n

In [8]:
fecha_corte = transactions['date'].max()
clients['cohort'] = clients['registration_date'].dt.to_period('M')

print(f"=== AN√ÅLISIS DE COHORTES ===")
print(f"Fecha de corte: {fecha_corte}\n")

cohort_sizes = clients.groupby('cohort').size()
print(f"Total de cohortes: {len(cohort_sizes)}")
print(f"Rango: {cohort_sizes.index.min()} a {cohort_sizes.index.max()}")

=== AN√ÅLISIS DE COHORTES ===
Fecha de corte: 2024-12-31 00:00:00

Total de cohortes: 42
Rango: 2021-01 a 2024-06


In [9]:
trans_with_cohort = transactions.merge(
    clients[['client_id', 'cohort', 'registration_date']],
    on='client_id'
)

trans_with_cohort['periodo'] = (
    (trans_with_cohort['date'].dt.to_period('M') - trans_with_cohort['cohort'])
    .apply(lambda x: x.n)
)

retention_table = trans_with_cohort.groupby(['cohort', 'periodo'])['client_id'].nunique().reset_index()
retention_table.columns = ['cohort', 'periodo', 'clientes_activos']

retention_matrix = retention_table.pivot(index='cohort', columns='periodo', values='clientes_activos')
cohort_sizes_dict = cohort_sizes.to_dict()
retention_rate = retention_matrix.divide(retention_matrix.index.map(cohort_sizes_dict), axis=0) * 100

print("\nüìà MATRIZ DE RETENCI√ìN (%) - Primeros 6 meses:")
print(retention_rate.iloc[:, :7].round(1))

print("\nüìä ESTAD√çSTICAS DE RETENCI√ìN:")
print(f"Retenci√≥n Mes 1: {retention_rate[1].mean():.1f}%")
print(f"Retenci√≥n Mes 3: {retention_rate[3].mean():.1f}%")
if 6 in retention_rate.columns:
    print(f"Retenci√≥n Mes 6: {retention_rate[6].mean():.1f}%")


üìà MATRIZ DE RETENCI√ìN (%) - Primeros 6 meses:
periodo    -3    -2   -1      0      1      2      3
cohort                                              
2021-01   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-02   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-03   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-04   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-05   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-06   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-07   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-08   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-09   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-10   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-11   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2021-12   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2022-01   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2022-02   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2022-03   NaN   NaN  NaN    NaN    NaN    NaN    NaN
2022-04   NaN   NaN  NaN    NaN    NaN    NaN   

In [10]:
ultima_trans = transactions.groupby('client_id')['date'].max().reset_index()
ultima_trans.columns = ['client_id', 'ultima_transaccion']

churn_analysis = clients.merge(ultima_trans, on='client_id', how='left')
churn_analysis['dias_activo'] = (
    churn_analysis['ultima_transaccion'] - churn_analysis['registration_date']
).dt.days
churn_analysis['dias_inactivo'] = (fecha_corte - churn_analysis['ultima_transaccion']).dt.days

balance_actual = portfolio[portfolio['date'] == portfolio['date'].max()].groupby('client_id')['balance'].sum().reset_index()
balance_actual.columns = ['client_id', 'balance_actual']
churn_analysis = churn_analysis.merge(balance_actual, on='client_id', how='left')
churn_analysis['balance_actual'] = churn_analysis['balance_actual'].fillna(0)

churn_analysis['churned'] = (
    (churn_analysis['balance_actual'] == 0) &
    (churn_analysis['dias_inactivo'] > 90)
).astype(int)

print(f"\n=== AN√ÅLISIS DE CHURN ===")
print(f"Clientes CHURNED: {churn_analysis['churned'].sum()} ({churn_analysis['churned'].mean()*100:.1f}%)")
print(f"Clientes ACTIVOS: {(1-churn_analysis['churned']).sum()} ({(1-churn_analysis['churned']).mean()*100:.1f}%)")


=== AN√ÅLISIS DE CHURN ===
Clientes CHURNED: 110 (13.8%)
Clientes ACTIVOS: 690 (86.2%)


## 4. Segmentaci√≥n RFM

In [11]:
recency = transactions.groupby('client_id')['date'].max().reset_index()
recency.columns = ['client_id', 'ultima_fecha']
recency['recency'] = (fecha_corte - recency['ultima_fecha']).dt.days

frequency = transactions.groupby('client_id').size().reset_index()
frequency.columns = ['client_id', 'frequency']

monetary = transactions[transactions['type'] == 'deposit'].groupby('client_id')['amount'].sum().reset_index()
monetary.columns = ['client_id', 'monetary']

rfm = recency[['client_id', 'recency']].merge(frequency, on='client_id', how='left')
rfm = rfm.merge(monetary, on='client_id', how='left')
rfm = rfm.merge(clients[['client_id', 'segment', 'registration_date']], on='client_id', how='left')
rfm['monetary'] = rfm['monetary'].fillna(0)

print("=== M√âTRICAS RFM ===\n")
print(rfm[['recency', 'frequency', 'monetary']].describe())

=== M√âTRICAS RFM ===

       recency  frequency       monetary
count   800.00     800.00         800.00
mean     47.61      27.77  23,889,687.50
std      68.79      13.23  31,978,146.33
min       0.00      10.00   1,650,000.00
25%       6.00      19.00   7,350,000.00
50%      18.00      24.00  11,750,000.00
75%      48.25      32.00  23,162,500.00
max     304.00      77.00 287,000,000.00


In [12]:
rfm['R_score'] = pd.qcut(rfm['recency'], q=5, labels=[5,4,3,2,1], duplicates='drop').astype(int)
rfm['F_score'] = pd.qcut(rfm['frequency'].rank(method='first'), q=5, labels=[1,2,3,4,5], duplicates='drop').astype(int)
rfm['M_score'] = pd.qcut(rfm['monetary'].rank(method='first'), q=5, labels=[1,2,3,4,5], duplicates='drop').astype(int)

rfm['RFM_score'] = rfm['R_score'] + rfm['F_score'] + rfm['M_score']

In [13]:
def segmentar_rfm(df):
    campeones = (df['R_score'] == 5) & (df['F_score'] >= 4) & (df['M_score'] >= 4)
    leales = (df['R_score'] >= 4) & (df['F_score'] >= 4)
    potenciales = (df['R_score'] >= 4) & (df['F_score'] <= 2) & (df['M_score'] >= 4)
    atencion = (df['R_score'] == 3) & (df['F_score'] == 3)
    riesgo = (df['R_score'] <= 2) & (df['F_score'] >= 4)
    hibernando = (df['R_score'] <= 2) & (df['F_score'] <= 2) & (df['M_score'] >= 4)
    perdidos = (df['R_score'] <= 2) & (df['F_score'] <= 2) & (df['M_score'] <= 2)
    
    dias_registro = (fecha_corte - df['registration_date']).dt.days
    nuevos = (dias_registro <= 90) & (df['F_score'] <= 2)
    
    segmento = []
    for i in range(len(df)):
        if campeones.iloc[i] and not nuevos.iloc[i]:
            segmento.append('Campeones')
        elif leales.iloc[i] and not nuevos.iloc[i] and not campeones.iloc[i]:
            segmento.append('Leales')
        elif potenciales.iloc[i] and not nuevos.iloc[i]:
            segmento.append('Potenciales')
        elif riesgo.iloc[i]:
            segmento.append('En Riesgo')
        elif hibernando.iloc[i]:
            segmento.append('Hibernando')
        elif perdidos.iloc[i]:
            segmento.append('Perdidos')
        elif nuevos.iloc[i]:
            segmento.append('Nuevos')
        elif atencion.iloc[i]:
            segmento.append('Necesitan Atenci√≥n')
        else:
            segmento.append('Promedio')
    
    return segmento

rfm['segmento_rfm'] = segmentar_rfm(rfm)

print("\nüìä DISTRIBUCI√ìN DE SEGMENTOS RFM:")
print(rfm['segmento_rfm'].value_counts())
print("\nProporci√≥n (%):")
print((rfm['segmento_rfm'].value_counts() / len(rfm) * 100).round(1))


üìä DISTRIBUCI√ìN DE SEGMENTOS RFM:
segmento_rfm
Promedio              387
Perdidos              108
En Riesgo             101
Leales                 81
Campeones              78
Necesitan Atenci√≥n     26
Hibernando             12
Potenciales             7
Name: count, dtype: int64

Proporci√≥n (%):
segmento_rfm
Promedio             48.40
Perdidos             13.50
En Riesgo            12.60
Leales               10.10
Campeones             9.80
Necesitan Atenci√≥n    3.20
Hibernando            1.50
Potenciales           0.90
Name: count, dtype: float64


In [14]:
rfm.to_csv('rfm_segmentation.csv', index=False)
churn_analysis.to_csv('churn_analysis.csv', index=False)
retention_rate.to_csv('retention_matrix.csv')
print("\n‚úì Archivos guardados: rfm_segmentation.csv, churn_analysis.csv, retention_matrix.csv")


‚úì Archivos guardados: rfm_segmentation.csv, churn_analysis.csv, retention_matrix.csv


## 5. Visualizaciones Ejecutivas

In [15]:
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("talk")
plt.rcParams['figure.figsize'] = (14, 8)

In [16]:
print("\nüìä Generando visualizaciones...")

fig, ax = plt.subplots(figsize=(14, 6))

transactions['year_month'] = transactions['date'].dt.to_period('M').dt.to_timestamp()
flujo_mensual = transactions.groupby(['year_month', 'type'])['amount'].sum().unstack(fill_value=0)
flujo_mensual['net_flow'] = flujo_mensual.get('deposit', 0) - flujo_mensual.get('withdrawal', 0)

x = range(len(flujo_mensual))
ax.bar(x, flujo_mensual['deposit']/1e9, width=0.8, label='Dep√≥sitos', color='#2ecc71', alpha=0.8)
ax.bar(x, -flujo_mensual['withdrawal']/1e9, width=0.8, label='Retiros', color='#e74c3c', alpha=0.8)

ax2 = ax.twinx()
ax2.plot(x, flujo_mensual['net_flow']/1e9, color='#3498db', linewidth=3,
         marker='o', markersize=6, label='Flujo Neto (NNM)')
ax2.axhline(y=0, color='black', linestyle='--', linewidth=1, alpha=0.3)

ax.set_xlabel('Mes', fontsize=12, fontweight='bold')
ax.set_ylabel('Flujo de Dinero (Miles de Millones $)', fontsize=12, fontweight='bold')
ax2.set_ylabel('NNM (Miles de Millones $)', fontsize=12, fontweight='bold')
ax.set_title('Evoluci√≥n del Net New Money (NNM)', fontsize=16, fontweight='bold', pad=20)
ax.set_xticks(x[::2])
ax.set_xticklabels([d.strftime('%Y-%m') for d in flujo_mensual.index[::2]], rotation=45)
ax.grid(True, alpha=0.3)

lines1, labels1 = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax.legend(lines1 + lines2, labels1 + labels2, loc='upper left')

plt.tight_layout()
plt.savefig('fig1_nnm_evolution.png', dpi=300, bbox_inches='tight')
plt.close()


üìä Generando visualizaciones...


In [17]:
fig, ax = plt.subplots(figsize=(14, 10))

retention_clean = retention_rate.iloc[-12:, :7].copy()
retention_clean.index = [str(idx) for idx in retention_clean.index]

sns.heatmap(retention_clean, annot=True, fmt='.0f', cmap='RdYlGn',
            vmin=0, vmax=100, cbar_kws={'label': 'Tasa de Retenci√≥n (%)'}, ax=ax,
            linewidths=0.5, linecolor='gray')

ax.set_title('Matriz de Retenci√≥n por Cohorte', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Meses desde Registro', fontsize=12, fontweight='bold')
ax.set_ylabel('Cohorte (Mes de Registro)', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.savefig('fig2_retention_matrix.png', dpi=300, bbox_inches='tight')
plt.close()

In [18]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

segmento_counts = rfm['segmento_rfm'].value_counts()
colors_segments = {
    'Campeones': '#2ecc71', 'Leales': '#27ae60', 'Potenciales': '#3498db',
    'Promedio': '#95a5a6', 'Necesitan Atenci√≥n': '#f39c12', 'En Riesgo': '#e67e22',
    'Hibernando': '#9b59b6', 'Perdidos': '#e74c3c'
}
colors = [colors_segments.get(seg, '#95a5a6') for seg in segmento_counts.index]

wedges, texts, autotexts = ax1.pie(segmento_counts, labels=segmento_counts.index, autopct='%1.1f%%',
                                     colors=colors, startangle=90)
for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontweight('bold')
ax1.set_title('Distribuci√≥n de Clientes por Segmento RFM', fontsize=14, fontweight='bold')

valor_segmento = rfm.groupby('segmento_rfm')['monetary'].sum().sort_values(ascending=True) / 1e9
colors_valor = [colors_segments.get(seg, '#95a5a6') for seg in valor_segmento.index]

valor_segmento.plot(kind='barh', ax=ax2, color=colors_valor, alpha=0.8)
ax2.set_xlabel('Valor Total (Miles de Millones $)', fontsize=12, fontweight='bold')
ax2.set_title('Valor Total por Segmento RFM', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3, axis='x')

for i, v in enumerate(valor_segmento):
    ax2.text(v + 0.1, i, f'${v:.1f}B', va='center', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig('fig3_rfm_segments.png', dpi=300, bbox_inches='tight')
plt.close()

print("‚úì Visualizaciones guardadas: fig1_nnm_evolution.png, fig2_retention_matrix.png, fig3_rfm_segments.png")

‚úì Visualizaciones guardadas: fig1_nnm_evolution.png, fig2_retention_matrix.png, fig3_rfm_segments.png


## 6. Modelos Predictivos de Churn

In [19]:
print("\n=== MODELOS PREDICTIVOS DE CHURN ===\n")

model_data = clients[['client_id', 'age', 'income_monthly', 'segment', 'risk_score']].copy()

rfm_features = rfm[['client_id', 'recency', 'frequency', 'monetary',
                     'R_score', 'F_score', 'M_score', 'RFM_score']].copy()
model_data = model_data.merge(rfm_features, on='client_id', how='left')

model_data['dias_desde_registro'] = (fecha_corte - clients['registration_date']).dt.days

productos_por_cliente = transactions.groupby('client_id')['product'].nunique().reset_index()
productos_por_cliente.columns = ['client_id', 'num_productos']
model_data = model_data.merge(productos_por_cliente, on='client_id', how='left')

model_data = model_data.merge(balance_actual, on='client_id', how='left')
model_data['balance_actual'] = model_data['balance_actual'].fillna(0)

dep_ret = transactions.groupby(['client_id', 'type'])['amount'].sum().unstack(fill_value=0)
dep_ret['deposit_withdrawal_ratio'] = dep_ret['deposit'] / (dep_ret['withdrawal'] + 1)
model_data = model_data.merge(dep_ret[['deposit_withdrawal_ratio']],
                               left_on='client_id', right_index=True, how='left')

trans_std = transactions.groupby('client_id')['amount'].std().reset_index()
trans_std.columns = ['client_id', 'amount_std']
model_data = model_data.merge(trans_std, on='client_id', how='left')

trans_range = transactions.groupby('client_id')['date'].agg(['min', 'max']).reset_index()
trans_range['dias_actividad'] = (trans_range['max'] - trans_range['min']).dt.days
model_data = model_data.merge(trans_range[['client_id', 'dias_actividad']], on='client_id', how='left')

model_data = model_data.merge(churn_analysis[['client_id', 'churned']], on='client_id', how='left')
model_data = model_data.fillna(0)

print(f"Features creados: {len(model_data.columns)} columnas")
print(f"Churn Rate: {model_data['churned'].mean()*100:.1f}%")


=== MODELOS PREDICTIVOS DE CHURN ===

Features creados: 19 columnas
Churn Rate: 13.8%


In [20]:
X = model_data.drop(['client_id', 'churned'], axis=1).copy()
y = model_data['churned']

le = LabelEncoder()
X['segment_encoded'] = le.fit_transform(X['segment'])
X = X.drop('segment', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                      random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns, index=X_train.index)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns, index=X_test.index)

print(f"\nTrain set: {len(X_train)} | Test set: {len(X_test)}")


Train set: 640 | Test set: 160


In [21]:
lr = LogisticRegression(random_state=42, max_iter=1000, class_weight='balanced')
lr.fit(X_train_scaled, y_train)

y_pred_lr = lr.predict(X_test_scaled)
y_proba_lr = lr.predict_proba(X_test_scaled)[:, 1]

print("\nüìä REGRESI√ìN LOG√çSTICA:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_lr):.3f}")
print(f"ROC-AUC: {roc_auc_score(y_test, y_proba_lr):.3f}")
print(f"F1-Score: {f1_score(y_test, y_pred_lr):.3f}")


üìä REGRESI√ìN LOG√çSTICA:
Accuracy: 1.000
ROC-AUC: 1.000
F1-Score: 1.000


In [22]:
rf = RandomForestClassifier(n_estimators=100, random_state=42,
                            class_weight='balanced', max_depth=10)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
y_proba_rf = rf.predict_proba(X_test)[:, 1]

print("\nüìä RANDOM FOREST:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.3f}")
print(f"ROC-AUC: {roc_auc_score(y_test, y_proba_rf):.3f}")
print(f"F1-Score: {f1_score(y_test, y_pred_rf):.3f}")

feature_importance_rf = pd.DataFrame({
    'feature': X_train.columns,
    'importance': rf.feature_importances_
}).sort_values('importance', ascending=False)

print("\nTop 10 Features:")
print(feature_importance_rf.head(10))


üìä RANDOM FOREST:
Accuracy: 1.000
ROC-AUC: 1.000
F1-Score: 1.000

Top 10 Features:
                     feature  importance
12            balance_actual        0.41
13  deposit_withdrawal_ratio        0.21
3                    recency        0.16
6                    R_score        0.15
15            dias_actividad        0.03
14                amount_std        0.02
9                  RFM_score        0.01
1             income_monthly        0.01
8                    M_score        0.00
5                   monetary        0.00


In [23]:
gb = GradientBoostingClassifier(n_estimators=100, random_state=42,
                                max_depth=5, learning_rate=0.1)
gb.fit(X_train, y_train)

y_pred_gb = gb.predict(X_test)
y_proba_gb = gb.predict_proba(X_test)[:, 1]

print("\nüìä GRADIENT BOOSTING:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_gb):.3f}")
print(f"ROC-AUC: {roc_auc_score(y_test, y_proba_gb):.3f}")
print(f"F1-Score: {f1_score(y_test, y_pred_gb):.3f}")


üìä GRADIENT BOOSTING:
Accuracy: 1.000
ROC-AUC: 1.000
F1-Score: 1.000


In [24]:
results = pd.DataFrame({
    'Modelo': ['Logistic Regression', 'Random Forest', 'Gradient Boosting'],
    'Accuracy': [accuracy_score(y_test, y_pred_lr), accuracy_score(y_test, y_pred_rf), accuracy_score(y_test, y_pred_gb)],
    'ROC-AUC': [roc_auc_score(y_test, y_proba_lr), roc_auc_score(y_test, y_proba_rf), roc_auc_score(y_test, y_proba_gb)],
    'F1-Score': [f1_score(y_test, y_pred_lr), f1_score(y_test, y_pred_rf), f1_score(y_test, y_pred_gb)]
})

print("\nüìä COMPARACI√ìN DE MODELOS:")
print(results)

best_model_idx = results['ROC-AUC'].idxmax()
best_model_name = results.loc[best_model_idx, 'Modelo']
print(f"\nüèÜ MEJOR MODELO: {best_model_name} (ROC-AUC: {results.loc[best_model_idx, 'ROC-AUC']:.3f})")


üìä COMPARACI√ìN DE MODELOS:
                Modelo  Accuracy  ROC-AUC  F1-Score
0  Logistic Regression      1.00     1.00      1.00
1        Random Forest      1.00     1.00      1.00
2    Gradient Boosting      1.00     1.00      1.00

üèÜ MEJOR MODELO: Logistic Regression (ROC-AUC: 1.000)


In [25]:
X_all = model_data.drop(['client_id', 'churned'], axis=1).copy()
X_all['segment_encoded'] = le.transform(X_all['segment'])
X_all = X_all.drop('segment', axis=1)

churn_probability = rf.predict_proba(X_all)[:, 1]
model_data['churn_probability'] = churn_probability
model_data['churn_predicted'] = (churn_probability > 0.5).astype(int)

high_risk = model_data[model_data['churn_probability'] > 0.7].sort_values('churn_probability', ascending=False)

print(f"\n‚ö†Ô∏è Clientes de ALTO RIESGO (prob > 0.7): {len(high_risk)}")
print("\nTop 10 clientes en mayor riesgo:")
print(high_risk[['client_id', 'segment', 'churn_probability', 'churned', 'recency', 'frequency', 'monetary']].head(10))

predictions_output = model_data[['client_id', 'segment', 'churn_probability', 'churn_predicted', 'churned']].copy()
predictions_output.to_csv('churn_predictions.csv', index=False)
print("\n‚úì Predicciones guardadas en: churn_predictions.csv")


‚ö†Ô∏è Clientes de ALTO RIESGO (prob > 0.7): 110

Top 10 clientes en mayor riesgo:
     client_id segment  churn_probability  churned  recency  frequency  \
26        1027  retail               1.00        1      282         26   
15        1016  retail               1.00        1      120         34   
72        1073  retail               1.00        1      233         40   
78        1079  retail               1.00        1      244         31   
65        1066  retail               1.00        1      171         29   
63        1064  retail               1.00        1      287         27   
82        1083  retail               1.00        1      237         22   
131       1132  retail               1.00        1      158         35   
132       1133  retail               1.00        1      241         27   
125       1126  retail               1.00        1      118         30   

     monetary  
26    8200000  
15    9750000  
72   28500000  
78   20100000  
65    4700000  
63   

In [26]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

fpr_lr, tpr_lr, _ = roc_curve(y_test, y_proba_lr)
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_proba_rf)
fpr_gb, tpr_gb, _ = roc_curve(y_test, y_proba_gb)

ax1.plot(fpr_lr, tpr_lr, label=f'Logistic Reg (AUC={roc_auc_score(y_test, y_proba_lr):.3f})', linewidth=2)
ax1.plot(fpr_rf, tpr_rf, label=f'Random Forest (AUC={roc_auc_score(y_test, y_proba_rf):.3f})', linewidth=2)
ax1.plot(fpr_gb, tpr_gb, label=f'Gradient Boost (AUC={roc_auc_score(y_test, y_proba_gb):.3f})', linewidth=2)
ax1.plot([0, 1], [0, 1], 'k--', label='Random', linewidth=1)
ax1.set_xlabel('False Positive Rate', fontweight='bold')
ax1.set_ylabel('True Positive Rate', fontweight='bold')
ax1.set_title('Curvas ROC - Comparaci√≥n de Modelos', fontsize=14, fontweight='bold')
ax1.legend(loc='lower right')
ax1.grid(True, alpha=0.3)

top_features = feature_importance_rf.head(15)
ax2.barh(range(len(top_features)), top_features['importance'], color='steelblue', alpha=0.8)
ax2.set_yticks(range(len(top_features)))
ax2.set_yticklabels(top_features['feature'])
ax2.set_xlabel('Importancia', fontweight='bold')
ax2.set_title('Top 15 Features - Random Forest', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3, axis='x')
ax2.invert_yaxis()

plt.tight_layout()
plt.savefig('fig_model_performance.png', dpi=300, bbox_inches='tight')
plt.close()
print("‚úì Guardado: fig_model_performance.png")

‚úì Guardado: fig_model_performance.png


## 7. Forecasting con Prophet

In [27]:
def detectar_outliers_iqr(series, factor=3.0):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - factor * IQR
    upper = Q3 + factor * IQR
    p99 = series.quantile(0.99)
    return series.clip(lower=lower, upper=min(upper, p99))

In [28]:
if PROPHET_DISPONIBLE:
    print("\n=== FORECASTING CON PROPHET ===\n")
    
    transactions['week'] = transactions['date'].dt.to_period('W').dt.to_timestamp()
    
    flujo_semanal = transactions.groupby(['week', 'type'])['amount'].sum().unstack(fill_value=0)
    flujo_semanal['net_flow'] = flujo_semanal.get('deposit', 0) - flujo_semanal.get('withdrawal', 0)
    flujo_semanal['net_flow'] = detectar_outliers_iqr(flujo_semanal['net_flow'])
    
    portfolio['week'] = portfolio['date'].dt.to_period('W').dt.to_timestamp()
    balance_semanal = portfolio.groupby('week')['balance'].mean().reset_index()
    balance_semanal.columns = ['week', 'balance_promedio']
    
    productos_semana = transactions.groupby('week')['product'].nunique().reset_index()
    productos_semana.columns = ['week', 'productos_activos']
    
    clientes_activos = portfolio.groupby('week').apply(
        lambda x: (x['balance'] == 0).sum() / len(x) if len(x) > 0 else 0
    ).reset_index()
    clientes_activos.columns = ['week', 'churn_rate']
    
    df_prophet_nnm = flujo_semanal.reset_index()
    df_prophet_nnm = df_prophet_nnm.rename(columns={'week': 'ds', 'net_flow': 'y'})
    df_prophet_nnm = df_prophet_nnm.merge(balance_semanal, left_on='ds', right_on='week', how='left')
    df_prophet_nnm = df_prophet_nnm.merge(productos_semana, left_on='ds', right_on='week', how='left')
    df_prophet_nnm = df_prophet_nnm.merge(clientes_activos, left_on='ds', right_on='week', how='left')
    df_prophet_nnm = df_prophet_nnm.drop(columns=[c for c in df_prophet_nnm.columns if c.startswith('week')], errors='ignore')
    
    for col in ['balance_promedio', 'productos_activos', 'churn_rate']:
        df_prophet_nnm[col] = df_prophet_nnm[col].fillna(method='ffill').fillna(method='bfill').fillna(0)
    
    balance_std = df_prophet_nnm['balance_promedio'].std()
    productos_std = df_prophet_nnm['productos_activos'].std()
    churn_std = df_prophet_nnm['churn_rate'].std()
    
    df_prophet_nnm['balance_promedio_norm'] = (
        (df_prophet_nnm['balance_promedio'] - df_prophet_nnm['balance_promedio'].mean()) /
        (balance_std if balance_std > 0 else 1)
    )
    df_prophet_nnm['productos_activos_norm'] = (
        (df_prophet_nnm['productos_activos'] - df_prophet_nnm['productos_activos'].mean()) /
        (productos_std if productos_std > 0 else 1)
    )
    df_prophet_nnm['churn_rate_norm'] = (
        (df_prophet_nnm['churn_rate'] - df_prophet_nnm['churn_rate'].mean()) /
        (churn_std if churn_std > 0 else 1)
    )
    
    df_prophet_nnm = df_prophet_nnm.fillna(0)
    
    print(f"Datos NNM preparados: {len(df_prophet_nnm)} semanas")
    print(f"Rango: {df_prophet_nnm['ds'].min()} a {df_prophet_nnm['ds'].max()}")


=== FORECASTING CON PROPHET ===

Datos NNM preparados: 106 semanas
Rango: 2022-12-26 00:00:00 a 2024-12-30 00:00:00


In [29]:
if PROPHET_DISPONIBLE:
    print("\nüöÄ Entrenando modelo Prophet NNM...")
    
    m_nnm = Prophet(
        seasonality_mode='additive',
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        changepoint_prior_scale=0.05,
        seasonality_prior_scale=10.0,
        interval_width=0.95
    )
    
    m_nnm.add_regressor('balance_promedio_norm')
    m_nnm.add_regressor('productos_activos_norm')
    m_nnm.add_regressor('churn_rate_norm')
    
    m_nnm.fit(df_prophet_nnm)
    
    future_nnm = m_nnm.make_future_dataframe(periods=52, freq='W')
    ultimas_semanas = df_prophet_nnm.tail(4)
    future_nnm['balance_promedio_norm'] = ultimas_semanas['balance_promedio_norm'].mean()
    future_nnm['productos_activos_norm'] = ultimas_semanas['productos_activos_norm'].mean()
    future_nnm['churn_rate_norm'] = ultimas_semanas['churn_rate_norm'].mean()
    future_nnm = future_nnm.fillna(0)
    
    forecast_nnm = m_nnm.predict(future_nnm)
    
    fecha_max_nnm = df_prophet_nnm['ds'].max()
    forecast_futuro_nnm = forecast_nnm[forecast_nnm['ds'] > fecha_max_nnm]
    
    print(f"\nüí∞ PRON√ìSTICO NNM 52 SEMANAS:")
    print(f"NNM Promedio Semanal: ${forecast_futuro_nnm['yhat'].mean()/1e9:.3f}B")
    print(f"NNM Total: ${forecast_futuro_nnm['yhat'].sum()/1e9:.2f}B")


üöÄ Entrenando modelo Prophet NNM...


21:17:39 - cmdstanpy - INFO - Chain [1] start processing
21:17:40 - cmdstanpy - INFO - Chain [1] done processing



üí∞ PRON√ìSTICO NNM 52 SEMANAS:
NNM Promedio Semanal: $0.135B
NNM Total: $7.00B


In [30]:
if PROPHET_DISPONIBLE:
    clients['week'] = clients['registration_date'].dt.to_period('W').dt.to_timestamp()
    registros_semana = clients.groupby('week').size().reset_index()
    registros_semana.columns = ['ds', 'y']
    registros_semana['y'] = detectar_outliers_iqr(registros_semana['y'])
    
    print(f"\nDatos Nuevos Clientes preparados: {len(registros_semana)} semanas")


Datos Nuevos Clientes preparados: 181 semanas


In [31]:
if PROPHET_DISPONIBLE:
    print("\nüöÄ Entrenando modelo Prophet Nuevos Clientes...")
    
    m_clientes = Prophet(
        seasonality_mode='additive',
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        changepoint_prior_scale=0.05,
        seasonality_prior_scale=10.0,
        interval_width=0.95
    )
    
    m_clientes.fit(registros_semana)
    
    future_clientes = m_clientes.make_future_dataframe(periods=52, freq='W')
    forecast_clientes = m_clientes.predict(future_clientes)
    
    fecha_max_clientes = registros_semana['ds'].max()
    forecast_futuro_clientes = forecast_clientes[forecast_clientes['ds'] > fecha_max_clientes]
    
    print(f"\nüë• PRON√ìSTICO NUEVOS CLIENTES 52 SEMANAS:")
    print(f"Clientes Promedio Semanal: {forecast_futuro_clientes['yhat'].mean():.1f}")
    print(f"Clientes Total: {forecast_futuro_clientes['yhat'].sum():.0f}")

21:17:48 - cmdstanpy - INFO - Chain [1] start processing



üöÄ Entrenando modelo Prophet Nuevos Clientes...


21:17:48 - cmdstanpy - INFO - Chain [1] done processing



üë• PRON√ìSTICO NUEVOS CLIENTES 52 SEMANAS:
Clientes Promedio Semanal: 4.8
Clientes Total: 252


In [32]:
if PROPHET_DISPONIBLE:
    carpeta = 'modelos_prophet_tinvest'
    os.makedirs(carpeta, exist_ok=True)
    
    with open(f'{carpeta}/modelo_nnm.pkl', 'wb') as f:
        pickle.dump(m_nnm, f)
    forecast_nnm.to_csv(f'{carpeta}/forecast_nnm.csv', index=False)
    
    with open(f'{carpeta}/modelo_nuevos_clientes.pkl', 'wb') as f:
        pickle.dump(m_clientes, f)
    forecast_clientes.to_csv(f'{carpeta}/forecast_nuevos_clientes.csv', index=False)
    
    print(f"\n‚úì Modelos Prophet guardados en {carpeta}/")


‚úì Modelos Prophet guardados en modelos_prophet_tinvest/


In [33]:
if PROPHET_DISPONIBLE:
    fig, ax = plt.subplots(figsize=(16, 8))
    
    fecha_max = df_prophet_nnm['ds'].max()
    forecast_hist = forecast_nnm[forecast_nnm['ds'] <= fecha_max]
    forecast_futuro = forecast_nnm[forecast_nnm['ds'] > fecha_max]
    
    scale = 1e9
    
    ax.plot(df_prophet_nnm['ds'], df_prophet_nnm['y']/scale, 'o',
            label='Hist√≥rico', color='#2E86AB', markersize=4, alpha=0.6)
    ax.plot(forecast_hist['ds'], forecast_hist['yhat']/scale,
            '-', label='Ajuste modelo', color='#06A77D', linewidth=2)
    ax.plot(forecast_futuro['ds'], forecast_futuro['yhat']/scale,
            '-o', label='Pron√≥stico 52 semanas', color='#FF6B35', linewidth=2.5, markersize=5)
    ax.fill_between(forecast_futuro['ds'],
                     forecast_futuro['yhat_lower']/scale,
                     forecast_futuro['yhat_upper']/scale,
                     alpha=0.2, color='#FF6B35', label='IC 95%')
    ax.axvline(fecha_max, color='red', linestyle='--', linewidth=2, alpha=0.7)
    ax.axhline(0, color='black', linestyle='-', linewidth=0.5, alpha=0.3)
    
    ax.set_xlabel('Fecha', fontsize=12, fontweight='bold')
    ax.set_ylabel('NNM (Miles de Millones $)', fontsize=12, fontweight='bold')
    ax.set_title('Pron√≥stico de Net New Money (NNM) - Prophet', fontsize=14, fontweight='bold', pad=20)
    ax.legend(loc='best', fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('fig_prophet_nnm.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("‚úì Guardado: fig_prophet_nnm.png")

‚úì Guardado: fig_prophet_nnm.png


In [35]:
if PROPHET_DISPONIBLE:
    fig, ax = plt.subplots(figsize=(16, 8))
    
    fecha_max = registros_semana['ds'].max()
    forecast_hist = forecast_clientes[forecast_clientes['ds'] <= fecha_max]
    forecast_futuro = forecast_clientes[forecast_clientes['ds'] > fecha_max]
    
    ax.plot(registros_semana['ds'], registros_semana['y'], 'o',
            label='Hist√≥rico', color='#2E86AB', markersize=4, alpha=0.6)
    ax.plot(forecast_hist['ds'], forecast_hist['yhat'],
            '-', label='Ajuste modelo', color='#06A77D', linewidth=2)
    ax.plot(forecast_futuro['ds'], forecast_futuro['yhat'],
            '-o', label='Pron√≥stico 52 semanas', color='#FF6B35', linewidth=2.5, markersize=5)
    ax.fill_between(forecast_futuro['ds'],
                     forecast_futuro['yhat_lower'],
                     forecast_futuro['yhat_upper'],
                     alpha=0.2, color='#FF6B35', label='IC 95%')
    ax.axvline(fecha_max, color='red', linestyle='--', linewidth=2, alpha=0.7)
    
    ax.set_xlabel('Fecha', fontsize=12, fontweight='bold')
    ax.set_ylabel('Nuevos Clientes', fontsize=12, fontweight='bold')
    ax.set_title('Pron√≥stico de Nuevos Clientes - Prophet', fontsize=14, fontweight='bold', pad=20)
    ax.legend(loc='best', fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('fig_prophet_clientes.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("‚úì Guardado: fig_prophet_clientes.png")

‚úì Guardado: fig_prophet_clientes.png


## 8. Resumen Ejecutivo

In [36]:
print("\n" + "="*80)
print("RESUMEN EJECUTIVO COMPLETO - TINVEST")
print("="*80)

print(f"""
üìä M√âTRICAS CLAVE:
  ‚Ä¢ Total Clientes: {len(clients):,}
  ‚Ä¢ Total Transacciones: {len(transactions):,}
  ‚Ä¢ Churn Rate: {churn_analysis['churned'].mean()*100:.1f}%
  ‚Ä¢ Retenci√≥n Mes 3: {retention_rate[3].mean():.1f}%

üí∞ FLUJO DE DINERO:
  ‚Ä¢ Total Dep√≥sitos: ${total_deposits/1e9:.2f}B
  ‚Ä¢ Total Retiros: ${total_withdrawals/1e9:.2f}B
  ‚Ä¢ NNM Total: ${net_flow/1e9:.2f}B

üíé SEGMENTACI√ìN RFM:
  ‚Ä¢ Campeones: {len(rfm[rfm['segmento_rfm']=='Campeones'])} ({len(rfm[rfm['segmento_rfm']=='Campeones'])/len(rfm)*100:.1f}%)
  ‚Ä¢ En Riesgo: {len(rfm[rfm['segmento_rfm']=='En Riesgo'])} ({len(rfm[rfm['segmento_rfm']=='En Riesgo'])/len(rfm)*100:.1f}%)
  ‚Ä¢ Perdidos: {len(rfm[rfm['segmento_rfm']=='Perdidos'])} ({len(rfm[rfm['segmento_rfm']=='Perdidos'])/len(rfm)*100:.1f}%)

ü§ñ MODELO PREDICTIVO:
  ‚Ä¢ Mejor modelo: {best_model_name}
  ‚Ä¢ ROC-AUC: {results.loc[best_model_idx, 'ROC-AUC']:.3f}
  ‚Ä¢ Clientes alto riesgo: {len(high_risk)}
""")

if PROPHET_DISPONIBLE:
    print(f"""üîÆ FORECASTING (52 SEMANAS):
  ‚Ä¢ NNM Promedio Semanal: ${forecast_futuro_nnm['yhat'].mean()/1e9:.3f}B
  ‚Ä¢ NNM Total: ${forecast_futuro_nnm['yhat'].sum()/1e9:.2f}B
  ‚Ä¢ Nuevos Clientes Promedio: {forecast_futuro_clientes['yhat'].mean():.1f}/semana
  ‚Ä¢ Total Nuevos Clientes: {forecast_futuro_clientes['yhat'].sum():.0f}
""")

print("""
‚úÖ ARCHIVOS GENERADOS:
  ‚Ä¢ rfm_segmentation.csv
  ‚Ä¢ churn_analysis.csv
  ‚Ä¢ retention_matrix.csv
  ‚Ä¢ churn_predictions.csv
  ‚Ä¢ fig1_nnm_evolution.png
  ‚Ä¢ fig2_retention_matrix.png
  ‚Ä¢ fig3_rfm_segments.png
  ‚Ä¢ fig_model_performance.png
  ‚Ä¢ fig_prophet_nnm.png (si Prophet disponible)
  ‚Ä¢ fig_prophet_clientes.png (si Prophet disponible)
  ‚Ä¢ modelos_prophet_tinvest/ (si Prophet disponible)

üìå RECOMENDACIONES:
  1. Priorizar intervenci√≥n en clientes de alto riesgo (prob > 0.7)
  2. Reactivar segmentos 'En Riesgo' y 'Hibernando'
  3. Programas de fidelizaci√≥n para 'Campeones' y 'Leales'
  4. Mejorar onboarding para retenci√≥n temprana
  5. Monitorear NNM y registros semanalmente
""")

print("\n" + "="*80)
print("AN√ÅLISIS COMPLETADO")
print("="*80)


RESUMEN EJECUTIVO COMPLETO - TINVEST

üìä M√âTRICAS CLAVE:
  ‚Ä¢ Total Clientes: 800
  ‚Ä¢ Total Transacciones: 22,220
  ‚Ä¢ Churn Rate: 13.8%
  ‚Ä¢ Retenci√≥n Mes 3: 76.1%

üí∞ FLUJO DE DINERO:
  ‚Ä¢ Total Dep√≥sitos: $19.11B
  ‚Ä¢ Total Retiros: $8.76B
  ‚Ä¢ NNM Total: $10.35B

üíé SEGMENTACI√ìN RFM:
  ‚Ä¢ Campeones: 78 (9.8%)
  ‚Ä¢ En Riesgo: 101 (12.6%)
  ‚Ä¢ Perdidos: 108 (13.5%)

ü§ñ MODELO PREDICTIVO:
  ‚Ä¢ Mejor modelo: Logistic Regression
  ‚Ä¢ ROC-AUC: 1.000
  ‚Ä¢ Clientes alto riesgo: 110

üîÆ FORECASTING (52 SEMANAS):
  ‚Ä¢ NNM Promedio Semanal: $0.135B
  ‚Ä¢ NNM Total: $7.00B
  ‚Ä¢ Nuevos Clientes Promedio: 4.8/semana
  ‚Ä¢ Total Nuevos Clientes: 252


‚úÖ ARCHIVOS GENERADOS:
  ‚Ä¢ rfm_segmentation.csv
  ‚Ä¢ churn_analysis.csv
  ‚Ä¢ retention_matrix.csv
  ‚Ä¢ churn_predictions.csv
  ‚Ä¢ fig1_nnm_evolution.png
  ‚Ä¢ fig2_retention_matrix.png
  ‚Ä¢ fig3_rfm_segments.png
  ‚Ä¢ fig_model_performance.png
  ‚Ä¢ fig_prophet_nnm.png (si Prophet disponible)
  ‚Ä¢ fig_prophet_