In [1]:
library(readr)
library(MASS)
library(dplyr)
library(xgboost)
library(Matrix)
library(rbcb)
library(lubridate)


Attaching package: ‘dplyr’


The following object is masked from ‘package:MASS’:

    select


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union




In [None]:
base = read_csv('base_concorrencia_bq.csv')

### Prazo-Class

In [None]:
base = base %>%
  mutate(
    grupo_prazo = case_when(
      is.na(prazo_scr) ~ NA_character_,
      prazo_scr <= 12 ~ '1',
      prazo_scr > 12 & prazo_scr <= 24 ~ '2',
      prazo_scr > 24 ~ '3'      
    )
  )

base <- base %>%
  mutate(
    label_prazo = as.integer(grupo_prazo) - 1L)

base$grupo_prazo <- factor(base$grupo_prazo, levels = as.character(1:3))

base$label_prazo <- ifelse(
  is.na(base$grupo_prazo),
  NA_integer_,
  as.integer(base$grupo_prazo) - 1L
)


In [None]:
bancos <- c('Banco Inter', 'Banco do Brasil', 'Bradesco', 'C6 Bank', 
            'Caixa', 'Cooperativas', 'Itaú', 'Mercado Pago', 
            'Nubank', 'Pagseguro', 'Santander')

lista_bancos <- vector("list", length(bancos))
names(lista_bancos) <- bancos 

for (nome_banco in bancos){

    print(nome_banco)
    base_ = base %>% filter(IF_adj==nome_banco,prazo_scr <= 60, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_train = base_ %>% filter(!is.na(prazo_scr))
    
    base__ = base %>% filter(IF_adj==nome_banco, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_pred = base__ %>% filter(is.na(prazo_scr))
    
    idx <- sample(seq_len(nrow(base_train)), size = floor(0.8*nrow(base_train)))
    train_df <- base_train[idx, ]
    valid_df <- base_train[-idx, ]
    
    fml <- ~ factor(IsAcquirerActive) + factor(Default_Month_Prior) + FullDebt_3M + factor(month_year) + Cont_Socios + razao_debt_tpv + desembolso_scr + mediana_tpv_3m + factor(ano_inicio_atividade) + Cont_Socios 
    
    X_train <- sparse.model.matrix(fml, data = train_df)[, -1]
    X_valid <- sparse.model.matrix(fml, data = valid_df)[, -1]

    y_train <- train_df$label_prazo
    y_valid <- valid_df$label_prazo
    
    dtrain <- xgb.DMatrix(data = X_train, label = y_train)
    dvalid <- xgb.DMatrix(data = X_valid, label = y_valid)
    
    params <- list(
    objective = "multi:softmax",
    eval_metric = "merror",
    max_depth = 3,
    min_child_weight = 10,
    eta = 0.5,
    subsample = 0.8,
    colsample_bytree = 0.8,
    num_class = 3,
    seed = 1234)

    bst <- xgb.train(
    params = params,
    data = dtrain,
    nrounds = 400,
    watchlist = list(train = dtrain, valid = dvalid),
    early_stopping_rounds = 10,
    verbose = 2)
    
    pred_test <- predict(bst, dvalid)
    print(as.data.frame(cbind(pred_test,y_valid)) %>% mutate(acerto = if_else(pred_test == y_valid,1,0)) %>% summarise(acertos = sum(acerto)/n()))
        
    feature_names <- colnames(X_train)
    factor_vars <- c("month_year")

    # 1) garante factor no treino
    for (v in factor_vars) {
      train_df[[v]] <- as.factor(train_df[[v]])
    }

    # 2) captura níveis
    train_levels <- lapply(train_df[, factor_vars, drop = FALSE], levels)
    
    for (v in names(train_levels)) {
      base_pred[[v]] <- factor(base_pred[[v]], levels = train_levels[[v]])
    }

    base_pred <- base_pred %>% 
      tidyr::drop_na(FullDebt_3M, desembolso_scr, month_year, 
        Cont_Socios, razao_debt_tpv
      )

    X_pred <- sparse.model.matrix(fml, data = base_pred)[, -1]

    missing_cols <- setdiff(feature_names, colnames(X_pred))
    if (length(missing_cols) > 0) {
      add_mat <- Matrix::Matrix(0, nrow = nrow(X_pred), ncol = length(missing_cols), sparse = TRUE)
      colnames(add_mat) <- missing_cols
      X_pred <- cbind(X_pred, add_mat)
    }

    # Remove colunas extras (apareceram no novo mas não existem no treino)
    extra_cols <- setdiff(colnames(X_pred), feature_names)
    if (length(extra_cols) > 0) {
      X_pred <- X_pred[, setdiff(colnames(X_pred), extra_cols), drop = FALSE]
    }

    # Reordena exatamente como no treino
    X_pred <- X_pred[, feature_names, drop = FALSE]

    dpred <- xgb.DMatrix(X_pred)

    pred <- predict(bst, dpred)

    base_pred$label_prazo = pred
    
    lista_bancos[[nome_banco]] <- rbind(base_pred,base_train)

}

In [None]:
base_prazo_filled = bind_rows(lista_bancos)

In [None]:
base_prazo_filled %>% write_csv(.,'base_prazo_filled.csv')

### Prazo-Reg

In [None]:
bancos <- c('Banco Inter', 'Banco do Brasil', 'Bradesco', 'C6 Bank', 
            'Caixa', 'Cooperativas', 'Itaú', 'Mercado Pago', 
            'Nubank', 'Pagseguro', 'Santander')

lista_bancos <- vector("list", length(bancos))
names(lista_bancos) <- bancos 

for (nome_banco in bancos){

    print(nome_banco)
    base_ = base %>% filter(IF_adj==nome_banco,prazo_scr <= 60, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_train = base_ %>% filter(!is.na(prazo_scr))

    base__ = base %>% filter(IF_adj==nome_banco, !is.na(mediana_tpv_3m), mediana_tpv_3m > 1) %>% mutate(grupo_cnae = substr(cnae_fiscal_principal,1,3),std_dev_debt_3m = tidyr::replace_na(std_dev_debt_3m,0),FullDebt_3M = tidyr::replace_na(FullDebt_3M,0),IsAcquirerActive = tidyr::replace_na(IsAcquirerActive,FALSE)) %>% mutate(FullDebt_3M_2=FullDebt_3M**2,mediana_tpv_3m_2=mediana_tpv_3m**2,desembolso_scr_2=desembolso_scr**2) %>% mutate(razao_debt_tpv = FullDebt_3M/mediana_tpv_3m,razao_tpv_if = mediana_tpv_3m/n_if) 
    base_pred = base__ %>% filter(is.na(prazo_scr))
    
    idx <- sample(seq_len(nrow(base_train)), size = floor(0.9*nrow(base_train)))
    train_df <- base_train[idx, ]
    valid_df <- base_train[-idx, ]

    fml <- ~ factor(IsAcquirerActive) + factor(Default_Month_Prior) + log(std_dev_debt_3m+1) + log(FullDebt_3M+1) + log(FullDebt_3M_2+1) + log(mediana_tpv_3m) + log(mediana_tpv_3m_2) + log(desembolso_scr) + log(desembolso_scr_2) + factor(month_year) + n_if +
      factor(ano_inicio_atividade) + factor(uf) +
      Cont_Socios + factor(grupo_cnae) + razao_debt_tpv + log(razao_tpv_if+1)

    X_train <- sparse.model.matrix(fml, data = train_df)[, -1]
    X_valid <- sparse.model.matrix(fml, data = valid_df)[, -1]

    y_train <- train_df$prazo_scr
    y_valid <- valid_df$prazo_scr

    y_train_log <- log1p(y_train)
    y_valid_log <- log1p(y_valid)

    dtrain <- xgb.DMatrix(X_train, label = y_train_log)
    dvalid <- xgb.DMatrix(X_valid, label = y_valid_log)

    params <- list(
      objective = "reg:absoluteerror",
      eval_metric = "mae",
      max_depth = 6,
      min_child_weight = 5,
      eta = 0.05,
      subsample = 0.8,
      colsample_bytree = 0.8,
      seed = 1234)

    bst <- xgb.train(
    params = params,
    data = dtrain,
    nrounds = 800,
    watchlist = list(train = dtrain, valid = dvalid),
    early_stopping_rounds = 30,
    verbose = 2)
    
    pred_log <- predict(bst, dvalid)
    pred <- expm1(pred_log)

    print(mean(abs(pred - y_valid)))
    
    breaks <- seq(0, 60, by = 10)
    
    bins <- cut(
      y_valid,
      breaks = breaks,
      right = FALSE,
      include.lowest = TRUE
    )

    tab <- table(bins)
    tab_perc <- prop.table(tab)

    res <- data.frame(
      faixa = names(tab),
      n = as.integer(tab),
      perc = round(100 * as.numeric(tab_perc), 1)
    )
    
    plot(
      y_valid, pred,
      xlab = "Prazo real (meses)",
      ylab = "Prazo previsto (meses)",
      main = sprintf("Predito vs Real (com distribuição do prazo real) %s", as.character(nome_banco)),
      pch = 16, col = rgb(0, 0, 0, 0.3)
    )

    abline(0, 1, col = "red", lwd = 2)

    # Linhas verticais das faixas
    abline(v = breaks, col = "grey80", lty = 3)

    # Anotar percentuais no topo
    y_top <- max(pred, na.rm = TRUE) * 1.02

    for (i in seq_along(tab)) {
      if (tab[i] > 0) {
        x_mid <- (breaks[i] + breaks[i + 1]) / 2
        text(
          x = x_mid,
          y = y_top,
          labels = paste0(round(100 * tab[i] / sum(tab), 1), "%"),
          cex = 0.8
        )
      }
    }


    feature_names <- colnames(X_train)
    factor_vars <- c("IsAcquirerActive","Default_Month_Prior","month_year",
                     "ano_inicio_atividade","uf","grupo_cnae")

    # 1) garante factor no treino
    for (v in factor_vars) {
      train_df[[v]] <- as.factor(train_df[[v]])
    }

    # 2) captura níveis
    train_levels <- lapply(train_df[, factor_vars, drop = FALSE], levels)
    
    for (v in names(train_levels)) {
      base_pred[[v]] <- factor(base_pred[[v]], levels = train_levels[[v]])
    }

    base_pred <- base_pred %>% 
      tidyr::drop_na(
        ano_inicio_atividade, grupo_cnae, IsAcquirerActive, 
        Default_Month_Prior, std_dev_debt_3m, FullDebt_3M, 
        mediana_tpv_3m, desembolso_scr, month_year, n_if, 
        uf, Cont_Socios, razao_debt_tpv, razao_tpv_if
      )

    X_pred <- sparse.model.matrix(fml, data = base_pred)[, -1]

    # Adiciona colunas faltantes (que existiam no treino mas não apareceram no novo)
    missing_cols <- setdiff(feature_names, colnames(X_pred))
    if (length(missing_cols) > 0) {
      add_mat <- Matrix::Matrix(0, nrow = nrow(X_pred), ncol = length(missing_cols), sparse = TRUE)
      colnames(add_mat) <- missing_cols
      X_pred <- cbind(X_pred, add_mat)
    }

    # Remove colunas extras (apareceram no novo mas não existem no treino)
    extra_cols <- setdiff(colnames(X_pred), feature_names)
    if (length(extra_cols) > 0) {
      X_pred <- X_pred[, setdiff(colnames(X_pred), extra_cols), drop = FALSE]
    }

    # Reordena exatamente como no treino
    X_pred <- X_pred[, feature_names, drop = FALSE]

    dpred <- xgb.DMatrix(X_pred)

    pred_log <- predict(bst, dpred)

    pred_prazo <- expm1(pred_log)  

    base_pred$prazo_scr = round(pred_prazo)
    
    lista_bancos[[nome_banco]] <- rbind(base_pred,base_train)
}

In [None]:
base_prazo_filled = bind_rows(lista_bancos)

In [None]:
base_prazo_filled %>% write_csv(.,'base_prazo_filled.csv')

### Taxa

In [2]:
base_prazo_filled = read_csv('base_prazo_filled.csv')

[1mRows: [22m[34m162385[39m [1mColumns: [22m[34m39[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m   (5): IF_adj, RootDocumentNumber, uf, flag_operacao, document
[32mdbl[39m  (30): flag_desembolsou_stone, desembolso_scr, limite_stone, prazo_scr, ...
[33mlgl[39m   (2): IsAcquirerActive, Default_Month_Prior
[34mdate[39m  (2): month_year, reference_month

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [3]:
colnames(base_prazo_filled)

In [4]:
base_prazo_filled = base_prazo_filled %>% mutate(flag_filled = case_when(!is.na(taxa_scr) ~ 0, TRUE ~ 1))

In [5]:
selic_raw <- get_series(c(selic = 1178), start_date = "2024-01-01")

df_selic <- selic_raw %>%
  mutate(month_year = floor_date(date, "month")) %>%
  group_by(month_year) %>%
  filter(date == max(date)) %>% # Pega a Selic vigente no fim do mês
  ungroup() %>%
  select(month_year, selic_fim_mes = selic) 
  


In [6]:
df_selic <- df_selic %>%
  mutate(
    selic_decimal_ano = selic_fim_mes / 100,
    selic_mensal_decimal = (1 + selic_decimal_ano)^(1/12) - 1
  )

In [7]:
df_selic = df_selic %>% select(month_year,selic_mensal_decimal)

In [8]:
base_prazo_filled = base_prazo_filled %>% left_join(.,df_selic,by=c("month_year"))  

In [9]:
base_prazo_filled = base_prazo_filled %>%
  mutate(
    grupo_taxa = case_when(
      is.na(taxa_scr) ~ NA_character_,
      taxa_scr <= 0.035 ~ '1',
      taxa_scr > 0.035 & taxa_scr <= 0.05 ~ '2',
      taxa_scr > 0.05 & taxa_scr <= 0.065 ~ '3',
      taxa_scr > 0.065 ~ '4'      
    )
  )

base_prazo_filled <- base_prazo_filled %>%
  mutate(
    label_taxa = as.integer(grupo_taxa) - 1L)

base_prazo_filled$grupo_taxa <- factor(base_prazo_filled$grupo_taxa, levels = as.character(1:7))

base_prazo_filled$label_taxa <- ifelse(
  is.na(base_prazo_filled$grupo_taxa),
  NA_integer_,
  as.integer(base_prazo_filled$grupo_taxa) - 1L
)


In [10]:
bancos <- c('Banco Inter', 'Banco do Brasil', 'Bradesco', 'C6 Bank', 
            'Caixa', 'Cooperativas', 'Itaú', 'Mercado Pago', 
            'Nubank', 'Pagseguro', 'Santander')

lista_bancos_tx <- vector("list", length(bancos))
names(lista_bancos_tx) <- bancos 

for (nome_banco in bancos){

    print(nome_banco)
    base_train_tx = base_prazo_filled %>% filter(IF_adj == nome_banco,!is.na(taxa_scr)) 
    base_pred_tx = base_prazo_filled %>% filter(IF_adj == nome_banco,is.na(taxa_scr))

    idx_tx <- sample(seq_len(nrow(base_train_tx)), size = floor(0.8*nrow(base_train_tx)))
    train_df_tx <- base_train_tx[idx_tx, ]
    valid_df_tx <- base_train_tx[-idx_tx, ]
    
    fml_tx <- ~ FullDebt_3M + factor(month_year) + Cont_Socios + razao_debt_tpv + desembolso_scr + factor(label_prazo) + selic_mensal_decimal 
    
    X_train_tx <- sparse.model.matrix(fml_tx, data = train_df_tx)[, -1]
    X_valid_tx <- sparse.model.matrix(fml_tx, data = valid_df_tx)[, -1]
    
    y_train_tx <- train_df_tx$label_taxa
    y_valid_tx <- valid_df_tx$label_taxa
    
    dtrain_tx <- xgb.DMatrix(data = X_train_tx, label = y_train_tx)
    dvalid_tx <- xgb.DMatrix(data = X_valid_tx, label = y_valid_tx)

    params_tx <- list(
    objective = "multi:softmax",
    eval_metric = "merror",
    max_depth = 4,
    min_child_weight = 3,
    eta = 0.8,
    subsample = 0.8,
    colsample_bytree = 0.8,
    num_class = 4,
    seed = 1234)
    
    bst_tx <- xgb.train(
    params = params_tx,
    data = dtrain_tx,
    nrounds = 800,
    watchlist = list(train = dtrain_tx, valid = dvalid_tx),
    early_stopping_rounds = 10,
    verbose = 2)
    
    pred_test <- predict(bst_tx, dvalid_tx)
    print(as.data.frame(cbind(pred_test,y_valid_tx)) %>% mutate(acerto = if_else(pred_test == y_valid_tx,1,0)) %>% summarise(acertos = sum(acerto)/n()))
    
    feature_names_tx <- colnames(X_train_tx)
    factor_vars_tx <- c("month_year")

    # 1) garante factor no treino
    for (v in factor_vars_tx) {
      train_df_tx[[v]] <- as.factor(train_df_tx[[v]])
    }

    # 2) captura níveis
    train_levels_tx <- lapply(train_df_tx[, factor_vars_tx, drop = FALSE], levels)
    
    for (v in names(train_levels_tx)) {
      base_pred_tx[[v]] <- factor(base_pred_tx[[v]], levels = train_levels_tx[[v]])
    }

    base_pred_tx <- base_pred_tx %>% 
      tidyr::drop_na(FullDebt_3M, desembolso_scr, month_year, 
        Cont_Socios, razao_debt_tpv
      )

    X_pred_tx <- sparse.model.matrix(fml_tx, data = base_pred_tx)[, -1]

    # Adiciona colunas faltantes (que existiam no treino mas não apareceram no novo)
    missing_cols <- setdiff(feature_names_tx, colnames(X_pred_tx))
    if (length(missing_cols) > 0) {
      add_mat <- Matrix::Matrix(0, nrow = nrow(X_pred_tx), ncol = length(missing_cols), sparse = TRUE)
      colnames(add_mat) <- missing_cols
      X_pred_tx <- cbind(X_pred_tx, add_mat)
    }

    # Remove colunas extras (apareceram no novo mas não existem no treino)
    extra_cols <- setdiff(colnames(X_pred_tx), feature_names_tx)
    if (length(extra_cols) > 0) {
      X_pred_tx <- X_pred_tx[, setdiff(colnames(X_pred_tx), extra_cols), drop = FALSE]
    }

    # Reordena exatamente como no treino
    X_pred_tx <- X_pred_tx[, feature_names_tx, drop = FALSE]

    dpred_tx <- xgb.DMatrix(X_pred_tx)

    pred_tx <- predict(bst_tx, dpred_tx)

    base_pred_tx$taxa_scr = round(pred_tx)
    
    lista_bancos_tx[[nome_banco]] <- rbind(base_pred_tx,base_train_tx)
    
}

[1] "Banco Inter"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.341549	valid-merror:0.408451 
[2]	train-merror:0.274648	valid-merror:0.436620 
[3]	train-merror:0.271127	valid-merror:0.436620 
[4]	train-merror:0.239437	valid-merror:0.422535 
[5]	train-merror:0.204225	valid-merror:0.436620 
[6]	train-merror:0.193662	valid-merror:0.422535 
[7]	train-merror:0.176056	valid-merror:0.394366 
[8]	train-merror:0.169014	valid-merror:0.380282 
[9]	train-merror:0.154930	valid-merror:0.309859 
[10]	train-merror:0.137324	valid-merror:0.323944 
[11]	train-merror:0.123239	valid-merror:0.309859 
[12]	train-merror:0.105634	valid-merror:0.309859 
[13]	train-merror:0.095070	valid-merror:0.309859 
[14]	train-merror:0.084507	valid-merror:0.309859 
[15]	train-merror:0.080986	valid-merror:0.323944 
[16]	train-merror:0.077465	valid-merror:0.323944 
[17]	train-merror:0.070423	valid-merror:0.323944 
[18]	train-merror:0.0



Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.351032	valid-merror:0.347059 
[2]	train-merror:0.308260	valid-merror:0.300000 
[3]	train-merror:0.283186	valid-merror:0.305882 
[4]	train-merror:0.269912	valid-merror:0.329412 
[5]	train-merror:0.256637	valid-merror:0.335294 
[6]	train-merror:0.243363	valid-merror:0.329412 
[7]	train-merror:0.221239	valid-merror:0.341176 
[8]	train-merror:0.202065	valid-merror:0.323529 
[9]	train-merror:0.191740	valid-merror:0.341176 
[10]	train-merror:0.187316	valid-merror:0.341176 
[11]	train-merror:0.185841	valid-merror:0.329412 
Stopping. Best iteration:
[12]	train-merror:0.184366	valid-merror:0.347059

[12]	train-merror:0.184366	valid-merror:0.347059 
  acertos
1     0.7
[1] "Bradesco"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.270010	valid-merror:0.238462 
[2]	train-merror:0.239151	valid-merror:0.246154 
[3]	train-merror:0.239151	valid-merror:0.253846 
[4]	train-merror:0.226615	valid-merror:0.261538 
[5]	train-merror:0.219865	valid-merror:0.269231 
[6]	train-merror:0.208293	valid-merror:0.250000 
[7]	train-merror:0.193828	valid-merror:0.257692 
[8]	train-merror:0.189007	valid-merror:0.273077 
[9]	train-merror:0.174542	valid-merror:0.265385 
[10]	train-merror:0.162970	valid-merror:0.269231 
Stopping. Best iteration:
[11]	train-merror:0.159113	valid-merror:0.261538

[11]	train-merror:0.159113	valid-merror:0.261538 
    acertos
1 0.7615385
[1] "C6 Bank"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.353414	valid-merror:0.682540 
[2]	train-merror:0.297189	valid-merror:0.682540 
[3]	train-merror:0.269076	valid-merror:0.682540 
[4]	train-merror:0.220884	valid-merror:0.682540 
[5]	train-merror:0.204819	valid-merror:0.682540 
[6]	train-merror:0.184739	valid-merror:0.698413 
[7]	train-merror:0.156627	valid-merror:0.714286 
[8]	train-merror:0.124498	valid-merror:0.714286 
[9]	train-merror:0.108434	valid-merror:0.698413 
[10]	train-merror:0.100402	valid-merror:0.698413 
Stopping. Best iteration:
[11]	train-merror:0.092369	valid-merror:0.730159

[11]	train-merror:0.092369	valid-merror:0.730159 
    acertos
1 0.3174603
[1] "Caixa"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.150402	valid-merror:0.169725 
[2]	train-merror:0.130884	valid-merror:0.160550 
[3]	train-merror:0.129736	valid-merror:0.160550 
[4]	train-merror:0.126292	valid-merror:0.160550 
[5]	train-merror:0.125144	valid-merror:0.178899 
[6]	train-merror:0.113662	valid-merror:0.178899 
[7]	train-merror:0.107922	valid-merror:0.178899 
[8]	train-merror:0.103330	valid-merror:0.183486 
[9]	train-merror:0.096441	valid-merror:0.178899 
[10]	train-merror:0.090700	valid-merror:0.183486 
[11]	train-merror:0.089552	valid-merror:0.183486 
Stopping. Best iteration:
[12]	train-merror:0.086108	valid-merror:0.188073

[12]	train-merror:0.086108	valid-merror:0.188073 
    acertos
1 0.8394495
[1] "Cooperativas"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.356707	valid-merror:0.375476 
[2]	train-merror:0.350610	valid-merror:0.374714 
[3]	train-merror:0.349657	valid-merror:0.376238 
[4]	train-merror:0.346989	valid-merror:0.373191 
[5]	train-merror:0.347180	valid-merror:0.371668 
[6]	train-merror:0.341845	valid-merror:0.377761 
[7]	train-merror:0.339748	valid-merror:0.379284 
[8]	train-merror:0.333270	valid-merror:0.378522 
[9]	train-merror:0.329268	valid-merror:0.380046 
[10]	train-merror:0.326601	valid-merror:0.383854 
[11]	train-merror:0.325267	valid-merror:0.386139 
[12]	train-merror:0.320884	valid-merror:0.383092 
[13]	train-merror:0.316883	valid-merror:0.384615 
[14]	train-merror:0.316120	valid-merror:0.386139 
Stopping. Best iteration:
[15]	train-merror:0.310976	valid-merror:0.383854

[15]	train-merror:0.310976	valid-merror:0.383854 
    acertos
1 0.6283321
[1] "Itaú"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.223853	valid-merror:0.289377 
[2]	train-merror:0.202752	valid-merror:0.282051 
[3]	train-merror:0.190826	valid-merror:0.271062 
[4]	train-merror:0.183486	valid-merror:0.271062 
[5]	train-merror:0.169725	valid-merror:0.271062 
[6]	train-merror:0.160550	valid-merror:0.256410 
[7]	train-merror:0.145872	valid-merror:0.252747 
[8]	train-merror:0.143119	valid-merror:0.249084 
[9]	train-merror:0.132110	valid-merror:0.234432 
[10]	train-merror:0.124771	valid-merror:0.252747 
[11]	train-merror:0.122936	valid-merror:0.245421 
[12]	train-merror:0.116514	valid-merror:0.256410 
[13]	train-merror:0.114679	valid-merror:0.260073 
[14]	train-merror:0.107339	valid-merror:0.267399 
[15]	train-merror:0.096330	valid-merror:0.271062 
[16]	train-merror:0.091743	valid-merror:0.278388 
[17]	train-merror:0.085321	valid-merror:0.274725 
[18]	train-merror:0.0



Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.544828	valid-merror:0.891892 
[2]	train-merror:0.448276	valid-merror:0.864865 
[3]	train-merror:0.351724	valid-merror:0.864865 
[4]	train-merror:0.331034	valid-merror:0.810811 
[5]	train-merror:0.303448	valid-merror:0.810811 
[6]	train-merror:0.213793	valid-merror:0.810811 
[7]	train-merror:0.213793	valid-merror:0.783784 
[8]	train-merror:0.179310	valid-merror:0.783784 
[9]	train-merror:0.179310	valid-merror:0.783784 
[10]	train-merror:0.158621	valid-merror:0.783784 
[11]	train-merror:0.158621	valid-merror:0.756757 
[12]	train-merror:0.131034	valid-merror:0.783784 
[13]	train-merror:0.096552	valid-merror:0.837838 
[14]	train-merror:0.089655	valid-merror:0.756757 
[15]	train-merror:0.103448	valid-merror:0.837838 
[16]	train-merror:0.117241	valid-merror:0.837838 
[17]	train-merror:0.089655	valid-merror:0.783784 
[18]	train-merror:0.0



Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.425344	valid-merror:0.449902 
[2]	train-merror:0.398821	valid-merror:0.412574 
[3]	train-merror:0.398084	valid-merror:0.404715 
[4]	train-merror:0.391454	valid-merror:0.395874 
[5]	train-merror:0.380648	valid-merror:0.396857 
[6]	train-merror:0.378684	valid-merror:0.401768 
[7]	train-merror:0.374263	valid-merror:0.410609 
[8]	train-merror:0.372053	valid-merror:0.411591 
[9]	train-merror:0.362475	valid-merror:0.415521 
[10]	train-merror:0.358055	valid-merror:0.415521 
[11]	train-merror:0.355108	valid-merror:0.416503 
[12]	train-merror:0.352161	valid-merror:0.410609 
[13]	train-merror:0.349460	valid-merror:0.415521 
Stopping. Best iteration:
[14]	train-merror:0.344303	valid-merror:0.416503

[14]	train-merror:0.344303	valid-merror:0.416503 
    acertos
1 0.6041257
[1] "Pagseguro"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.367470	valid-merror:0.642857 
[2]	train-merror:0.295181	valid-merror:0.642857 
[3]	train-merror:0.253012	valid-merror:0.690476 
[4]	train-merror:0.240964	valid-merror:0.642857 
[5]	train-merror:0.222892	valid-merror:0.619048 
[6]	train-merror:0.156627	valid-merror:0.619048 
[7]	train-merror:0.132530	valid-merror:0.619048 
[8]	train-merror:0.090361	valid-merror:0.619048 
[9]	train-merror:0.084337	valid-merror:0.619048 
[10]	train-merror:0.102410	valid-merror:0.619048 
[11]	train-merror:0.096386	valid-merror:0.666667 
[12]	train-merror:0.084337	valid-merror:0.666667 
[13]	train-merror:0.054217	valid-merror:0.642857 
[14]	train-merror:0.048193	valid-merror:0.666667 
Stopping. Best iteration:
[15]	train-merror:0.054217	valid-merror:0.714286

[15]	train-merror:0.054217	valid-merror:0.714286 
    acertos
1 0.3809524
[1] "Santander"




Multiple eval metrics are present. Will use valid_merror for early stopping.
Will train until valid_merror hasn't improved in 10 rounds.

[1]	train-merror:0.374408	valid-merror:0.339623 
[2]	train-merror:0.331754	valid-merror:0.364780 
[3]	train-merror:0.330174	valid-merror:0.358491 
[4]	train-merror:0.293839	valid-merror:0.364780 
[5]	train-merror:0.263823	valid-merror:0.358491 
[6]	train-merror:0.243286	valid-merror:0.377358 
[7]	train-merror:0.255924	valid-merror:0.389937 
[8]	train-merror:0.233807	valid-merror:0.377358 
[9]	train-merror:0.213270	valid-merror:0.364780 
[10]	train-merror:0.195893	valid-merror:0.345912 
Stopping. Best iteration:
[11]	train-merror:0.189573	valid-merror:0.358491

[11]	train-merror:0.189573	valid-merror:0.358491 
    acertos
1 0.6603774


In [11]:
base_tx_filled = bind_rows(lista_bancos_tx)

In [12]:
base_tx_filled = base_tx_filled %>% mutate(grupo_tx_scr = case_when(is.na(label_taxa) ~ taxa_scr,
                                             !is.na(label_taxa) ~ label_taxa)) 

In [13]:
base_tx_filled_stone = base_tx_filled %>% filter(!is.na(limite_stone)) %>% mutate(grupo_tx_stone = case_when(taxa_stone > 0 & taxa_stone <= 0.0349 ~ 0,
                                                                                                             taxa_stone > 0.0349 & taxa_stone <= 0.0499 ~ 1,
                                                                                                             taxa_stone > 0.0499 & taxa_stone <= 0.0649 ~ 2,
                                                                                                             taxa_stone > 0.0649 ~ 3)) 

In [14]:
base_tx_filled_stone = base_tx_filled_stone %>%
  mutate(
    grupo_prazo_stone = case_when(
      is.na(prazo_stone) ~ NA_character_,
      prazo_stone <= 12 ~ '1',
      prazo_stone > 12 & prazo_stone <= 24 ~ '2',
      prazo_stone > 24 ~ '3'      
    )
  )

base_tx_filled_stone <- base_tx_filled_stone %>%
  mutate(
    label_prazo_stone = as.integer(grupo_prazo_stone) - 1L)

base_tx_filled_stone$grupo_prazo_stone <- factor(base_tx_filled_stone$grupo_prazo_stone, levels = as.character(1:3))

base_tx_filled_stone$label_prazo_stone <- ifelse(
  is.na(base_tx_filled_stone$grupo_prazo_stone),
  NA_integer_,
  as.integer(base_tx_filled_stone$grupo_prazo_stone) - 1L
)


In [15]:
# Base com os desembolsos das IFs de Alta Penetração de clientes que estavam ativos e cientes da oferta Stone

base_tx_filled_stone_ = base_tx_filled_stone %>% select('IF_adj','RootDocumentNumber','month_year','flag_desembolsou_stone','label_prazo','label_prazo_stone','desembolso_scr','limite_stone','grupo_tx_stone','prazo_scr','taxa_scr','taxa_stone','IsAcquirerActive','flag_interacao','flag_operacao','grupo_tx_scr','diferenca_prazo','diferenca_limite','flag_filled') %>% filter(flag_interacao == TRUE,flag_operacao=='automatica') %>% filter(IF_adj %in% c('Banco Inter','Pagseguro','Santander','C6 Bank','Nubank','Mercado Pago')) 

In [16]:
base_orig = base_tx_filled_stone_ %>% filter(flag_filled == 0)
base_tx_filled_stone__ = base_tx_filled_stone_ %>% filter(flag_filled == 1)

In [17]:
base_tx_filled_stone__  = base_tx_filled_stone__ %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(label_prazo_stone > label_prazo,1,0),taxa_stone_melhor = if_else(grupo_tx_stone < grupo_tx_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor)

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n()
                                                                )

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

In [None]:
512+57

In [18]:
base_orig = base_orig %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(label_prazo_stone > label_prazo,1,0),taxa_stone_melhor = if_else(grupo_tx_stone < grupo_tx_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor)

In [None]:
base_orig %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr))

In [None]:
base_orig %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

In [None]:
# soma condição_melhor == 1
(27454307 + 1983783)/98593550.94

In [None]:
# soma condição_melhor == 2
(1059552 + 957250.6)/98593550.94

In [None]:
# soma condição_melhor == 3
(499529.5 + 94083.84)/98593550.94

In [None]:
# soma condição_melhor == 0
(62762943 + 3782102)/98593550.94

In [None]:
# total calibrado
29438090+2016802.6+593613.34+66545045

In [None]:
v1 = base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 0) %>% mutate(razao_limite = abs(diferenca_limite)/desembolso_scr) %>% filter((razao_limite <= 0.1)) %>% summarise(soma_des = sum(desembolso_scr)) %>% pull(soma_des)
v2 = base_orig %>% filter(qtd_stone_melhor == 0) %>% mutate(razao_limite = abs(diferenca_limite)/desembolso_scr) %>% filter((razao_limite <= 0.1)) %>% summarise(soma_des = sum(desembolso_scr)) %>% pull(soma_des)

print(v1+v2)

In [None]:
v1 = base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 0) %>% filter(grupo_tx_stone==2,grupo_tx_scr>=2) %>% summarise(soma_des = sum(desembolso_scr)) %>% pull(soma_des)
v2 = base_orig %>% filter(qtd_stone_melhor == 0) %>% filter(grupo_tx_stone==2,grupo_tx_scr>=2) %>% summarise(soma_des = sum(desembolso_scr)) %>% pull(soma_des)

print(v1+v2)

In [None]:
v1 = base_tx_filled_stone__ %>% filter(qtd_stone_melhor == 0) %>% filter(grupo_tx_stone==2,grupo_tx_scr>=2) %>% summarise(soma_des = sum(desembolso_scr)) %>% pull(soma_des)
v2 = base_orig %>% filter(qtd_stone_melhor == 0) %>% filter(grupo_tx_stone==2,grupo_tx_scr>=2) %>% summarise(soma_des = sum(desembolso_scr)) %>% pull(soma_des)

print(v1+v2)

In [19]:
# Base com os desembolsos das IFs de Baixa Penetração de clientes que estavam ativos e cientes da oferta Stone

base_tx_filled_stone_baixapen = base_tx_filled_stone %>% select('IF_adj','RootDocumentNumber','month_year','label_prazo','flag_desembolsou_stone','desembolso_scr','label_prazo','label_prazo_stone','limite_stone','grupo_tx_stone','prazo_stone','prazo_scr','taxa_scr','taxa_stone','IsAcquirerActive','flag_interacao','flag_operacao','grupo_tx_scr','diferenca_prazo','diferenca_limite','flag_filled') %>% filter(flag_interacao == TRUE,flag_operacao=='automatica') %>% filter(IF_adj %in% c('Banco do Brasil','Bradesco','Caixa','Cooperativas','Itaú','Outros'))

In [20]:
base_orig_baixapen = base_tx_filled_stone_baixapen %>% filter(flag_filled == 0)
base_tx_filled_stone__baixapen = base_tx_filled_stone_baixapen %>% filter(flag_filled == 1)

In [29]:
base_tx_filled_stone__baixapen_ = base_tx_filled_stone__baixapen %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(label_prazo_stone > label_prazo,1,0),taxa_stone_melhor = if_else(grupo_tx_stone < grupo_tx_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor)

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

In [None]:
base_tx_filled_stone__baixapen_ %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

In [23]:
base_orig_baixapen = base_orig_baixapen %>% mutate(limite_stone_melhor = if_else(limite_stone > desembolso_scr,1,0),prazo_stone_melhor = if_else(label_prazo_stone > label_prazo,1,0),taxa_stone_melhor = if_else(taxa_stone < taxa_scr,1,0)) %>% mutate(qtd_stone_melhor = limite_stone_melhor+prazo_stone_melhor+taxa_stone_melhor)

In [24]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 1) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

total_desembolso_scr,n
<dbl>,<int>
23773735,337


In [25]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 2) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

total_desembolso_scr,n
<dbl>,<int>
8685907,205


In [26]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 3) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

total_desembolso_scr,n
<dbl>,<int>
1054252,69


In [27]:
base_orig_baixapen %>% filter(qtd_stone_melhor == 0) %>% summarise(total_desembolso_scr = sum(desembolso_scr),n=n())

total_desembolso_scr,n
<dbl>,<int>
41748389,296


In [None]:
# soma condição_melhor == 1
(323105877 + 23773735)/1213315230.6

In [None]:
# soma condição_melhor == 2
(11359033 + 8685907)/1213315230.6

In [None]:
# soma condição_melhor == 3
(592478.6 + 8685907)/1213315230.6

In [None]:
# soma condição_melhor == 0
(795363904 + 41748389)/1213315230.6

In [None]:
# total calibrado
(323105877 + 23773735)+(11359033 + 8685907)+(592478.6 + 8685907)+(795363904 + 41748389)

In [None]:
base_tx_filled_stone_ %>% filter(qtd_stone_melhor == 0) 

In [None]:
base_tx_filled_stone_ %>% filter(qtd_stone_melhor == 0) %>% summarise(soma_des=sum(desembolso_scr))

In [None]:
base_tx_filled_stone_ %>% filter(qtd_stone_melhor == 0) %>% filter(abs(diferenca_limite)/desembolso_scr <= 0.1) %>% summarise(soma_des=sum(desembolso_scr))

In [None]:
base_tx_filled_stone_ %>% filter(qtd_stone_melhor == 0) %>% filter(abs(diferenca_limite)/desembolso_scr <= 0.1) %>% summarise(soma_des=sum(desembolso_scr))

In [None]:
base_tx_filled_stone_ %>% filter(qtd_stone_melhor == 0) %>% filter(label_prazo_stone >= label_prazo) %>% summarise(soma_des=sum(desembolso_scr))

In [None]:
base_tx_filled_stone_baixapen %>% filter(qtd_stone_melhor == 0) %>% filter(abs(diferenca_limite)/desembolso_scr <= 0.1) %>% summarise(soma_des=sum(desembolso_scr),n=n())

In [None]:
base_tx_filled_stone_ %>% filter(qtd_stone_melhor == 0) %>% group_by(IF_adj) %>% summarise(soma_des=sum(desembolso_scr)) %>% arrange(desc(soma_des)) 

In [None]:
base_tx_filled_stone_baixapen %>% group_by(IF_adj) %>% summarise(soma_des=sum(desembolso_scr)) %>% arrange(desc(soma_des))