In [None]:
library(readr)
library(MASS)
library(dplyr)
library(xgboost)
library(Matrix)
library(rbcb)
library(lubridate)

In [None]:
base = read_csv('base_concorrencia_bq.csv')

### Prazo-Class

In [None]:
base = base %>%
  mutate(
    grupo_prazo = case_when(
      is.na(prazo_scr) ~ NA_character_,
      prazo_scr <= 12 ~ '1',
      prazo_scr > 12 & prazo_scr <= 24 ~ '2',
      prazo_scr > 24 ~ '3'      
    )
  )

base <- base %>%
  mutate(
    label_prazo = as.integer(grupo_prazo) - 1L)

base$grupo_prazo <- factor(base$grupo_prazo, levels = as.character(1:3))

base$label_prazo <- ifelse(
  is.na(base$grupo_prazo),
  NA_integer_,
  as.integer(base$grupo_prazo) - 1L
)


In [None]:
bancos <- c('Banco Inter', 'Banco do Brasil', 'Bradesco', 'C6 Bank', 
            'Caixa', 'Cooperativas', 'Itaú', 'Mercado Pago', 
            'Nubank', 'Pagseguro', 'Santander')

lista_bancos <- vector("list", length(bancos))
names(lista_bancos) <- bancos 

for (nome_banco in bancos){

    print(nome_banco)
    base_ = base %>% filter(IF_adj==nome_banco,prazo_scr <= 60, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_train = base_ %>% filter(!is.na(prazo_scr))
    
    base__ = base %>% filter(IF_adj==nome_banco, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_pred = base__ %>% filter(is.na(prazo_scr))
    
    idx <- sample(seq_len(nrow(base_train)), size = floor(0.8*nrow(base_train)))
    train_df <- base_train[idx, ]
    valid_df <- base_train[-idx, ]
    
    fml <- ~ factor(IsAcquirerActive) + factor(Default_Month_Prior) + FullDebt_3M + factor(month_year) + Cont_Socios + razao_debt_tpv + desembolso_scr + mediana_tpv_3m + factor(ano_inicio_atividade) + Cont_Socios 
    
    X_train <- sparse.model.matrix(fml, data = train_df)[, -1]
    X_valid <- sparse.model.matrix(fml, data = valid_df)[, -1]

    y_train <- train_df$label_prazo
    y_valid <- valid_df$label_prazo
    
    dtrain <- xgb.DMatrix(data = X_train, label = y_train)
    dvalid <- xgb.DMatrix(data = X_valid, label = y_valid)
    
    params <- list(
    objective = "multi:softmax",
    eval_metric = "merror",
    max_depth = 3,
    min_child_weight = 10,
    eta = 0.5,
    subsample = 0.8,
    colsample_bytree = 0.8,
    num_class = 3,
    seed = 1234)

    bst <- xgb.train(
    params = params,
    data = dtrain,
    nrounds = 400,
    watchlist = list(train = dtrain, valid = dvalid),
    early_stopping_rounds = 10,
    verbose = 2)
    
    pred_test <- predict(bst, dvalid)
    print(as.data.frame(cbind(pred_test,y_valid)) %>% mutate(acerto = if_else(pred_test == y_valid,1,0)) %>% summarise(acertos = sum(acerto)/n()))
        
    feature_names <- colnames(X_train)
    factor_vars <- c("month_year")

    # 1) garante factor no treino
    for (v in factor_vars) {
      train_df[[v]] <- as.factor(train_df[[v]])
    }

    # 2) captura níveis
    train_levels <- lapply(train_df[, factor_vars, drop = FALSE], levels)
    
    for (v in names(train_levels)) {
      base_pred[[v]] <- factor(base_pred[[v]], levels = train_levels[[v]])
    }

    base_pred <- base_pred %>% 
      tidyr::drop_na(FullDebt_3M, desembolso_scr, month_year, 
        Cont_Socios, razao_debt_tpv
      )

    X_pred <- sparse.model.matrix(fml, data = base_pred)[, -1]

    missing_cols <- setdiff(feature_names, colnames(X_pred))
    if (length(missing_cols) > 0) {
      add_mat <- Matrix::Matrix(0, nrow = nrow(X_pred), ncol = length(missing_cols), sparse = TRUE)
      colnames(add_mat) <- missing_cols
      X_pred <- cbind(X_pred, add_mat)
    }

    # Remove colunas extras (apareceram no novo mas não existem no treino)
    extra_cols <- setdiff(colnames(X_pred), feature_names)
    if (length(extra_cols) > 0) {
      X_pred <- X_pred[, setdiff(colnames(X_pred), extra_cols), drop = FALSE]
    }

    # Reordena exatamente como no treino
    X_pred <- X_pred[, feature_names, drop = FALSE]

    dpred <- xgb.DMatrix(X_pred)

    pred <- predict(bst, dpred)

    base_pred$label_prazo = pred
    
    lista_bancos[[nome_banco]] <- rbind(base_pred,base_train)

}

In [None]:
base_prazo_filled = bind_rows(lista_bancos)

In [None]:
base_prazo_filled %>% write_csv(.,'base_prazo_filled.csv')

### Prazo-Reg

In [None]:
bancos <- c('Banco Inter', 'Banco do Brasil', 'Bradesco', 'C6 Bank', 
            'Caixa', 'Cooperativas', 'Itaú', 'Mercado Pago', 
            'Nubank', 'Pagseguro', 'Santander')

lista_bancos <- vector("list", length(bancos))
names(lista_bancos) <- bancos 

for (nome_banco in bancos){

    print(nome_banco)
    base_ = base %>% filter(IF_adj==nome_banco,prazo_scr <= 60, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_train = base_ %>% filter(!is.na(prazo_scr))

    base__ = base %>% filter(IF_adj==nome_banco, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_pred = base__ %>% filter(is.na(prazo_scr))
    
    idx <- sample(seq_len(nrow(base_train)), size = floor(0.9*nrow(base_train)))
    train_df <- base_train[idx, ]
    valid_df <- base_train[-idx, ]

    fml <- ~ factor(IsAcquirerActive) + factor(Default_Month_Prior) + log(std_dev_debt_3m+1) + log(FullDebt_3M+1) + log(FullDebt_3M_2+1) + log(mediana_tpv_3m) + log(mediana_tpv_3m_2) + log(desembolso_scr) + log(desembolso_scr_2) + factor(month_year) + n_if +
      factor(ano_inicio_atividade) + factor(uf) +
      Cont_Socios + factor(grupo_cnae) + razao_debt_tpv + log(razao_tpv_if+1)

    X_train <- sparse.model.matrix(fml, data = train_df)[, -1]
    X_valid <- sparse.model.matrix(fml, data = valid_df)[, -1]

    y_train <- train_df$prazo_scr
    y_valid <- valid_df$prazo_scr

    y_train_log <- log1p(y_train)
    y_valid_log <- log1p(y_valid)

    dtrain <- xgb.DMatrix(X_train, label = y_train_log)
    dvalid <- xgb.DMatrix(X_valid, label = y_valid_log)

    params <- list(
      objective = "reg:absoluteerror",
      eval_metric = "mae",
      max_depth = 6,
      min_child_weight = 5,
      eta = 0.05,
      subsample = 0.8,
      colsample_bytree = 0.8,
      seed = 1234)

    bst <- xgb.train(
    params = params,
    data = dtrain,
    nrounds = 800,
    watchlist = list(train = dtrain, valid = dvalid),
    early_stopping_rounds = 30,
    verbose = 2)
    
    pred_log <- predict(bst, dvalid)
    pred <- expm1(pred_log)

    print(mean(abs(pred - y_valid)))
    
    breaks <- seq(0, 60, by = 10)
    
    bins <- cut(
      y_valid,
      breaks = breaks,
      right = FALSE,
      include.lowest = TRUE
    )

    tab <- table(bins)
    tab_perc <- prop.table(tab)

    res <- data.frame(
      faixa = names(tab),
      n = as.integer(tab),
      perc = round(100 * as.numeric(tab_perc), 1)
    )
    
    plot(
      y_valid, pred,
      xlab = "Prazo real (meses)",
      ylab = "Prazo previsto (meses)",
      main = sprintf("Predito vs Real (com distribuição do prazo real) %s", as.character(nome_banco)),
      pch = 16, col = rgb(0, 0, 0, 0.3)
    )

    abline(0, 1, col = "red", lwd = 2)

    # Linhas verticais das faixas
    abline(v = breaks, col = "grey80", lty = 3)

    # Anotar percentuais no topo
    y_top <- max(pred, na.rm = TRUE) * 1.02

    for (i in seq_along(tab)) {
      if (tab[i] > 0) {
        x_mid <- (breaks[i] + breaks[i + 1]) / 2
        text(
          x = x_mid,
          y = y_top,
          labels = paste0(round(100 * tab[i] / sum(tab), 1), "%"),
          cex = 0.8
        )
      }
    }


    feature_names <- colnames(X_train)
    factor_vars <- c("IsAcquirerActive","Default_Month_Prior","month_year",
                     "ano_inicio_atividade","uf","grupo_cnae")

    # 1) garante factor no treino
    for (v in factor_vars) {
      train_df[[v]] <- as.factor(train_df[[v]])
    }

    # 2) captura níveis
    train_levels <- lapply(train_df[, factor_vars, drop = FALSE], levels)
    
    for (v in names(train_levels)) {
      base_pred[[v]] <- factor(base_pred[[v]], levels = train_levels[[v]])
    }

    base_pred <- base_pred %>% 
      tidyr::drop_na(
        ano_inicio_atividade, grupo_cnae, IsAcquirerActive, 
        Default_Month_Prior, std_dev_debt_3m, FullDebt_3M, 
        mediana_tpv_3m, desembolso_scr, month_year, n_if, 
        uf, Cont_Socios, razao_debt_tpv, razao_tpv_if
      )

    X_pred <- sparse.model.matrix(fml, data = base_pred)[, -1]

    # Adiciona colunas faltantes (que existiam no treino mas não apareceram no novo)
    missing_cols <- setdiff(feature_names, colnames(X_pred))
    if (length(missing_cols) > 0) {
      add_mat <- Matrix::Matrix(0, nrow = nrow(X_pred), ncol = length(missing_cols), sparse = TRUE)
      colnames(add_mat) <- missing_cols
      X_pred <- cbind(X_pred, add_mat)
    }

    # Remove colunas extras (apareceram no novo mas não existem no treino)
    extra_cols <- setdiff(colnames(X_pred), feature_names)
    if (length(extra_cols) > 0) {
      X_pred <- X_pred[, setdiff(colnames(X_pred), extra_cols), drop = FALSE]
    }

    # Reordena exatamente como no treino
    X_pred <- X_pred[, feature_names, drop = FALSE]

    dpred <- xgb.DMatrix(X_pred)

    pred_log <- predict(bst, dpred)

    pred_prazo <- expm1(pred_log)  

    base_pred$prazo_scr = round(pred_prazo)
    
    lista_bancos[[nome_banco]] <- rbind(base_pred,base_train)
}

In [None]:
base_prazo_filled = bind_rows(lista_bancos)

In [None]:
base_prazo_filled %>% write_csv(.,'base_prazo_filled.csv')

### Taxa

In [None]:
base_prazo_filled = read_csv('base_prazo_filled.csv')

In [None]:
base_prazo_filled = base_prazo_filled %>% mutate(flag_filled = case_when(!is.na(taxa_scr) ~ 0, TRUE ~ 1))

In [None]:
selic_raw <- get_series(c(selic = 1178), start_date = "2024-01-01")

df_selic <- selic_raw %>%
  mutate(month_year = floor_date(date, "month")) %>%
  group_by(month_year) %>%
  filter(date == max(date)) %>% # Pega a Selic vigente no fim do mês
  ungroup() %>%
  select(month_year, selic_fim_mes = selic) 
  


In [None]:
df_selic <- df_selic %>%
  mutate(
    selic_decimal_ano = selic_fim_mes / 100,
    selic_mensal_decimal = (1 + selic_decimal_ano)^(1/12) - 1
  )

In [None]:
df_selic = df_selic %>% select(month_year,selic_mensal_decimal)

In [None]:
base_prazo_filled = base_prazo_filled %>% left_join(.,df_selic,by=c("month_year"))  

In [None]:
base_prazo_filled = base_prazo_filled %>%
  mutate(
    grupo_taxa = case_when(
      is.na(taxa_scr) ~ NA_character_,
      taxa_scr <= 0.035 ~ '1',
      taxa_scr > 0.035 & taxa_scr <= 0.05 ~ '2',
      taxa_scr > 0.05 & taxa_scr <= 0.065 ~ '3',
      taxa_scr > 0.065 ~ '4'      
    )
  )

base_prazo_filled <- base_prazo_filled %>%
  mutate(
    label_taxa = as.integer(grupo_taxa) - 1L)

base_prazo_filled$grupo_taxa <- factor(base_prazo_filled$grupo_taxa, levels = as.character(1:7))

base_prazo_filled$label_taxa <- ifelse(
  is.na(base_prazo_filled$grupo_taxa),
  NA_integer_,
  as.integer(base_prazo_filled$grupo_taxa) - 1L
)


In [None]:
bancos <- c('Banco Inter', 'Banco do Brasil', 'Bradesco', 'C6 Bank', 
            'Caixa', 'Cooperativas', 'Itaú', 'Mercado Pago', 
            'Nubank', 'Pagseguro', 'Santander')

lista_bancos_tx <- vector("list", length(bancos))
names(lista_bancos_tx) <- bancos 

for (nome_banco in bancos){

    print(nome_banco)
    base_train_tx = base_prazo_filled %>% filter(IF_adj == nome_banco,!is.na(taxa_scr)) 
    base_pred_tx = base_prazo_filled %>% filter(IF_adj == nome_banco,is.na(taxa_scr))

    idx_tx <- sample(seq_len(nrow(base_train_tx)), size = floor(0.8*nrow(base_train_tx)))
    train_df_tx <- base_train_tx[idx_tx, ]
    valid_df_tx <- base_train_tx[-idx_tx, ]
    
    fml_tx <- ~ FullDebt_3M + factor(month_year) + Cont_Socios + razao_debt_tpv + desembolso_scr + factor(label_prazo) + selic_mensal_decimal 
    
    X_train_tx <- sparse.model.matrix(fml_tx, data = train_df_tx)[, -1]
    X_valid_tx <- sparse.model.matrix(fml_tx, data = valid_df_tx)[, -1]
    
    y_train_tx <- train_df_tx$label_taxa
    y_valid_tx <- valid_df_tx$label_taxa
    
    dtrain_tx <- xgb.DMatrix(data = X_train_tx, label = y_train_tx)
    dvalid_tx <- xgb.DMatrix(data = X_valid_tx, label = y_valid_tx)

    params_tx <- list(
    objective = "multi:softmax",
    eval_metric = "merror",
    max_depth = 4,
    min_child_weight = 3,
    eta = 0.8,
    subsample = 0.8,
    colsample_bytree = 0.8,
    num_class = 4,
    seed = 1234)
    
    bst_tx <- xgb.train(
    params = params_tx,
    data = dtrain_tx,
    nrounds = 800,
    watchlist = list(train = dtrain_tx, valid = dvalid_tx),
    early_stopping_rounds = 10,
    verbose = 2)
    
    pred_test <- predict(bst_tx, dvalid_tx)
    print(as.data.frame(cbind(pred_test,y_valid_tx)) %>% mutate(acerto = if_else(pred_test == y_valid_tx,1,0)) %>% summarise(acertos = sum(acerto)/n()))
    
    feature_names_tx <- colnames(X_train_tx)
    factor_vars_tx <- c("month_year")

    # 1) garante factor no treino
    for (v in factor_vars_tx) {
      train_df_tx[[v]] <- as.factor(train_df_tx[[v]])
    }

    # 2) captura níveis
    train_levels_tx <- lapply(train_df_tx[, factor_vars_tx, drop = FALSE], levels)
    
    for (v in names(train_levels_tx)) {
      base_pred_tx[[v]] <- factor(base_pred_tx[[v]], levels = train_levels_tx[[v]])
    }

    base_pred_tx <- base_pred_tx %>% 
      tidyr::drop_na(FullDebt_3M, desembolso_scr, month_year, 
        Cont_Socios, razao_debt_tpv
      )

    X_pred_tx <- sparse.model.matrix(fml_tx, data = base_pred_tx)[, -1]

    # Adiciona colunas faltantes (que existiam no treino mas não apareceram no novo)
    missing_cols <- setdiff(feature_names_tx, colnames(X_pred_tx))
    if (length(missing_cols) > 0) {
      add_mat <- Matrix::Matrix(0, nrow = nrow(X_pred_tx), ncol = length(missing_cols), sparse = TRUE)
      colnames(add_mat) <- missing_cols
      X_pred_tx <- cbind(X_pred_tx, add_mat)
    }

    # Remove colunas extras (apareceram no novo mas não existem no treino)
    extra_cols <- setdiff(colnames(X_pred_tx), feature_names_tx)
    if (length(extra_cols) > 0) {
      X_pred_tx <- X_pred_tx[, setdiff(colnames(X_pred_tx), extra_cols), drop = FALSE]
    }

    # Reordena exatamente como no treino
    X_pred_tx <- X_pred_tx[, feature_names_tx, drop = FALSE]

    dpred_tx <- xgb.DMatrix(X_pred_tx)

    pred_tx <- predict(bst_tx, dpred_tx)

    base_pred_tx$taxa_scr = round(pred_tx)
    
    lista_bancos_tx[[nome_banco]] <- rbind(base_pred_tx,base_train_tx)
    
}

In [None]:
base_tx_filled = bind_rows(lista_bancos_tx)

In [None]:
base_tx_filled = base_tx_filled %>% mutate(grupo_tx_scr = case_when(is.na(label_taxa) ~ taxa_scr,
                                             !is.na(label_taxa) ~ label_taxa)) 

In [None]:
base_tx_filled_stone = base_tx_filled %>% filter(!is.na(limite_stone)) %>% mutate(grupo_tx_stone = case_when(taxa_stone > 0 & taxa_stone <= 0.0349 ~ 0,
                                                                                                             taxa_stone > 0.0349 & taxa_stone <= 0.0499 ~ 1,
                                                                                                             taxa_stone > 0.0499 & taxa_stone <= 0.0649 ~ 2,
                                                                                                             taxa_stone > 0.0649 ~ 3))

In [None]:
base_tx_filled_stone = base_tx_filled_stone %>% mutate(diferenca_limite_50pct = if_else(desembolso_scr > 2*limite_stone,1,0),diferenca_tx_50pct = if_else(grupo_tx_scr < grupo_tx_stone,1,0))

In [None]:
# Base com os desembolsos das IFs de Alta Penetração de clientes que estavam ativos e cientes da oferta Stone

base_tx_filled_stone_ = base_tx_filled_stone %>% select('IF_adj','RootDocumentNumber','month_year','flag_desembolsou_stone','desembolso_scr','limite_stone','grupo_tx_stone','prazo_stone','prazo_scr','taxa_scr','taxa_stone','IsAcquirerActive','flag_interacao','flag_operacao','grupo_tx_scr','diferenca_prazo','diferenca_limite','diferenca_tx_50pct','diferenca_limite_50pct','diferenca_tx_50pct','flag_filled') %>% filter(IsAcquirerActive == TRUE,flag_interacao == TRUE,flag_operacao=='automatica') %>% filter(IF_adj %in% c('Banco Inter','Pagseguro','Santander','C6 Bank','Nubank','Mercado Pago')) 

In [None]:
base_orig = base_tx_filled_stone_ %>% filter(flag_filled == 0)
base_tx_filled_stone__ = base_tx_filled_stone_ %>% filter(flag_filled == 1)

In [None]:
base_tx_filled_stone__ = base_tx_filled_stone__ %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(prazo_stone > desembolso_scr,1,0),taxa_stone_melhor = if_else(grupo_tx_stone < grupo_tx_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor,qtd_flag = diferenca_limite_50pct+diferenca_tx_50pct)

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
25583371+1755261

In [None]:
808961.3+817758.2

In [None]:
46691467+3362144

In [None]:
base_orig = base_orig %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(prazo_stone > desembolso_scr,1,0),taxa_stone_melhor = if_else(taxa_stone < taxa_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor)

In [None]:
base_orig %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
# Base com os desembolsos das IFs de Baixa Penetração de clientes que estavam ativos e cientes da oferta Stone

base_tx_filled_stone_baixapen = base_tx_filled_stone %>% select('IF_adj','RootDocumentNumber','month_year','flag_desembolsou_stone','desembolso_scr','limite_stone','grupo_tx_stone','prazo_stone','prazo_scr','taxa_scr','taxa_stone','IsAcquirerActive','flag_interacao','flag_operacao','grupo_tx_scr','diferenca_prazo','diferenca_limite','diferenca_tx_50pct','diferenca_limite_50pct','diferenca_tx_50pct','flag_filled') %>% filter(IsAcquirerActive == TRUE,flag_interacao == TRUE,flag_operacao=='automatica') %>% filter(!(IF_adj %in% c('Banco Inter','Pagseguro','Santander','C6 Bank','Nubank','Mercado Pago')))

In [None]:
base_orig_baixapen = base_tx_filled_stone_baixapen %>% filter(flag_filled == 0)
base_tx_filled_stone__baixapen = base_tx_filled_stone_baixapen %>% filter(flag_filled == 1)

In [None]:
base_tx_filled_stone__baixapen_ = base_tx_filled_stone__baixapen %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(prazo_stone > desembolso_scr,1,0),taxa_stone_melhor = if_else(grupo_tx_stone < grupo_tx_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor,qtd_flag = diferenca_limite_50pct+diferenca_tx_50pct)

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
295644731+20655041

In [None]:
3373805+5959132

In [None]:
652564178+37599398

In [None]:
base_orig_baixapen = base_orig_baixapen %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(prazo_stone > desembolso_scr,1,0),taxa_stone_melhor = if_else(taxa_stone < taxa_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor)

In [None]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
?xgb.params