# Imports iniciais

In [15]:
import random
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression


In [2]:
RANDOM_STATE = 42
TEST_SIZE = 0.25
N_SPLITS = 5
TOP_K_IMPORTANCES = 15
RF_N_EST = 1000
XGB_N_EST = 1000

In [3]:
df = pd.read_csv('data/customer_churn.csv')

In [4]:
for col in ['MonthlyCharges', 'TotalCharges']:
    df[col] = df[col].str.replace(',', '.', regex=False)

In [5]:
df = df.dropna(subset=['MonthlyCharges', 'TotalCharges'])

In [6]:
for col in ['MonthlyCharges', 'TotalCharges']:
    df[col] = df[col].astype(float)

In [7]:
df['Churn_flag'] = df['Churn'].map({'Yes': 1, 'No': 0})

X = df.drop(columns=['customerID', 'Churn', 'Churn_flag'])
X = pd.get_dummies(X, drop_first=True)

In [8]:
factors = np.random.uniform(0.95, 1.2, size=df.shape[0])
df['RandomFactor'] = factors

In [None]:
y = df['Churn_flag']

model = LogisticRegression(solver='liblinear', max_iter=1000)
model.fit(X, y)

p_churn = model.predict_proba(X)[:, 1]
df['NPS'] = np.rint((1 - p_churn*df['RandomFactor']) * 10).astype(int)




In [10]:
df.groupby('NPS').size().sort_index(ascending=False)

Predicted_NPS
10    1653
9     1348
8      912
7      653
6      517
5      473
4      517
3      486
2      335
1      129
0        9
dtype: int64

In [11]:
df.groupby('NPS').aggregate({'Churn_flag' : 'mean'}).sort_index(ascending=False)

Unnamed: 0_level_0,Churn_flag
Predicted_NPS,Unnamed: 1_level_1
10,0.022384
9,0.077151
8,0.190789
7,0.286371
6,0.34236
5,0.446089
4,0.551257
3,0.664609
2,0.749254
1,0.868217


In [12]:
promoters = df[df['NPS'] >= 9].shape[0]
detractors = df[df['NPS'] <= 6].shape[0]

total_respondents = df.shape[0]
percent_promoters = (promoters / total_respondents) * 100
percent_detractors = (detractors / total_respondents) * 100
nps_general = percent_promoters - percent_detractors

# Exibir resultados
print(f"Promoters (9-10): {percent_promoters:.2f}%")
print(f"Detractors (0-6): {percent_detractors:.2f}%")
print(f"Net Promoter Score (NPS) geral: {nps_general:.2f}")

Promoters (9-10): 42.68%
Detractors (0-6): 35.07%
Net Promoter Score (NPS) geral: 7.61


In [13]:
df.groupby('Churn_flag').aggregate({'NPS' : 'mean'}).sort_index(ascending=False)

Unnamed: 0_level_0,Predicted_NPS
Churn_flag,Unnamed: 1_level_1
1,4.733547
0,8.032927


In [14]:
df.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn',
       'Churn_flag', 'RandomFactor', 'Predicted_NPS'],
      dtype='object')

In [16]:
def _dv_cpf(digs9):
    s1 = sum(d*w for d, w in zip(digs9, range(10, 1, -1)))
    d1 = 0 if (s1 % 11) < 2 else 11 - (s1 % 11)
    s2 = sum(d*w for d, w in zip(digs9 + [d1], range(11, 1, -1)))
    d2 = 0 if (s2 % 11) < 2 else 11 - (s2 % 11)
    return d1, d2

def _formata(s):
    return f"{s[0:3]}.{s[3:6]}.{s[6:9]}-{s[9:11]}"

In [17]:
def gerar_cpfs_para_df(df, col='cpf', formatado=True, seed=None, unico=True):
    rng = random.Random(seed)
    vistos = set()
    cpfs = []

    n = len(df)
    while len(cpfs) < n:
        base = [rng.randint(0, 9) for _ in range(9)]
        if len(set(base)) == 1:  # evita 000000000, 111111111, etc.
            continue
        d1, d2 = _dv_cpf(base)
        s = ''.join(map(str, base + [d1, d2]))
        if unico and s in vistos:
            continue
        vistos.add(s)
        cpfs.append(_formata(s) if formatado else s)

    df[col] = cpfs
    return df

In [None]:
df = gerar_cpfs_para_df(df, col='cpf', formatado=True, seed=42, unico=True)

In [19]:
df

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,Churn_flag,RandomFactor,Predicted_NPS,cpf
0,7569-NMZYQ,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,Yes,...,Two year,Yes,Bank transfer (automatic),118.75,8672.45,No,0,1.085200,9,104.332.181-00
1,8984-HPEMB,Female,0,No,No,71,Yes,Yes,Fiber optic,Yes,...,Two year,Yes,Electronic check,118.65,8477.60,No,0,1.193487,9,960.013.389-14
2,5989-AXPUC,Female,0,Yes,No,68,Yes,Yes,Fiber optic,Yes,...,Two year,No,Mailed check,118.60,7990.05,No,0,0.989884,9,083.863.794-99
3,5734-EJKXG,Female,0,No,No,61,Yes,Yes,Fiber optic,Yes,...,One year,Yes,Electronic check,118.60,7365.70,No,0,1.120897,8,026.542.351-14
4,8199-ZLLSA,Male,0,No,No,67,Yes,Yes,Fiber optic,Yes,...,One year,Yes,Bank transfer (automatic),118.35,7804.15,Yes,1,1.089027,9,161.559.407-89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,0621-CXBKL,Female,0,No,No,53,Yes,No,No,No internet service,...,Two year,No,Mailed check,18.70,1005.70,No,0,1.158039,10,756.999.060-96
7039,9945-PSVIP,Female,0,Yes,Yes,25,Yes,No,No,No internet service,...,Two year,Yes,Mailed check,18.70,383.65,No,0,1.011260,10,418.696.095-06
7040,0827-ITJPH,Male,0,No,No,36,Yes,No,No,No internet service,...,Two year,Yes,Credit card (automatic),18.55,689.00,No,0,1.195286,10,661.182.892-39
7041,9764-REAFF,Female,0,Yes,No,59,Yes,No,No,No internet service,...,Two year,No,Bank transfer (automatic),18.40,1057.85,No,0,1.134925,10,107.586.435-60


In [20]:
output_path = 'data/new_customer_nps.csv'
df.to_csv(output_path, index=False)