## PARCEIROS 360

###  Pagamento - Utilização Factoring & Bloqueios

- <b>Analista(s)</b>: Daniel Rezende
- <b>Projeto</b>: Parceiros 360 - Um modelo de análise de risco para auxiliar no controle das tomadas de decisões através de análises preditivas
- <b>Data</b>: 24/10/2022
- <b>Descrição</b>:

In [1]:
!python --version

Python 3.8.16


### BIBLIOTECAS:

In [2]:
## data
import pandas as pd
import numpy as np
import datetime as dt
from dateutil.relativedelta import relativedelta
from unicodedata import normalize

## vis
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

## modelo
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier

from treeinterpreter import treeinterpreter as ti

## alertas
import warnings
warnings.filterwarnings("ignore")

### LAYOUT:

In [3]:
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (12, 6)

pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 1000)

rc={'font.size': 16, 'axes.labelsize': 16, 'legend.fontsize': 16,
    'axes.titlesize': 18, 'xtick.labelsize': 16, 'ytick.labelsize': 16}

sns.set(rc=rc)
sns.set_style("darkgrid")
sns.set_palette("pastel")

### CONFIGS:

In [4]:
#pd.set_option('max_columns', 200)
pd.options.display.max_columns = 200 

seed = 42
np.random.seed(seed)

### DADOS:

In [5]:
PATH = "datasets/"

****
### BASE DE PAGAMENTO:

In [6]:
FILE = "02_PAGTO.csv"
pagto = pd.read_csv(PATH + FILE, sep = ';', encoding = 'ISO-8859-1', engine ='python')

pagto['COMPETENCIA'] = pd.to_datetime(pagto['COMPETENCIA'])

pagto['FORNECEDOR'] = pagto['FORNECEDOR'].apply(lambda x: str(x).zfill(8))
pagto = pagto[['FORNECEDOR', 'MONT_EM_MI', 'TP_PAGTO_BANCO', 'TP_PAGTO_FACTORING', 'TP_PAGTO_FACTORING_MONT_EM_MI', \
               'BLOQUEIO_NAO', 'BLOQUEIO_NAO_MONT_EM_MI', 'BLOQUEIO_SIM', 'BLOQUEIO_SIM_MONT_EM_MI']]

for i in pagto.columns:
    if pagto[i].dtypes=='object':
        pagto[i] = pagto[i].str.upper()
        
pagto = pagto.drop_duplicates().reset_index(drop=True)

pagto

Unnamed: 0,FORNECEDOR,MONT_EM_MI,TP_PAGTO_BANCO,TP_PAGTO_FACTORING,TP_PAGTO_FACTORING_MONT_EM_MI,BLOQUEIO_NAO,BLOQUEIO_NAO_MONT_EM_MI,BLOQUEIO_SIM,BLOQUEIO_SIM_MONT_EM_MI
0,10000001,-43120.91,1,0,0.0,1,-43120.91,0,0.0
1,10000004,-15571.65,4,0,0.0,4,-15571.65,0,0.0
2,10000004,-4938.05,1,0,0.0,1,-4938.05,0,0.0
3,10000004,-6123.58,3,0,0.0,3,-6123.58,0,0.0
4,10000004,-6156.44,2,0,0.0,2,-6156.44,0,0.0
...,...,...,...,...,...,...,...,...,...
186825,DF3501SP,-10082.08,28,0,0.0,28,-10082.08,0,0.0
186826,DF3501SP,-14749.67,28,0,0.0,28,-14749.67,0,0.0
186827,DF7029EX,-35641.13,1,0,0.0,1,-35641.13,0,0.0
186828,DF7029EX,-6036.65,2,0,0.0,2,-6036.65,0,0.0


In [7]:
# agrupa os valores dos fornecedores
pagto_sum = pagto.groupby(['FORNECEDOR']).sum().reset_index()

pagto_sum

Unnamed: 0,FORNECEDOR,MONT_EM_MI,TP_PAGTO_BANCO,TP_PAGTO_FACTORING,TP_PAGTO_FACTORING_MONT_EM_MI,BLOQUEIO_NAO,BLOQUEIO_NAO_MONT_EM_MI,BLOQUEIO_SIM,BLOQUEIO_SIM_MONT_EM_MI
0,00008687,-88.78,1,0,0.0,0,0.00,1,-88.78
1,00122607,-23.88,1,0,0.0,0,0.00,1,-23.88
2,01070492,2.25,1,0,0.0,1,2.25,0,0.00
3,04000005,-5116745.03,17558,0,0.0,16366,-4849578.50,1192,-267166.53
4,04000007,-27601170.03,2697,0,0.0,2548,-27475074.54,149,-126095.49
...,...,...,...,...,...,...,...,...,...
26740,DF0ST1SP,-5193308.35,1,0,0.0,1,-5193308.35,0,0.00
26741,DF0ST1TO,-392147.77,1,0,0.0,1,-392147.77,0,0.00
26742,DF0SV3RJ,-1316045.82,3,0,0.0,3,-1316045.82,0,0.00
26743,DF3501SP,-192219.12,362,0,0.0,362,-192219.12,0,0.00


In [8]:
len(pagto_sum['FORNECEDOR'].unique())

26745

***
### BASE DE CADASTRO DE NOME DOS FORNECEDORES:

In [9]:
FILE = "nome_fornecedores.xlsx"
nome_fornecedores = pd.read_excel(PATH + FILE)
nome_fornecedores = nome_fornecedores[['RAIZ_CNPJ', 'NOME_FORNECEDOR', 'FORNECEDOR', 'AREA_ATUACAO']]

nome_fornecedores['RAIZ_CNPJ'] = nome_fornecedores['RAIZ_CNPJ'].apply(lambda x: str(x).zfill(8))
nome_fornecedores = nome_fornecedores[nome_fornecedores["RAIZ_CNPJ"].str.contains("NAN")== False]
nome_fornecedores = nome_fornecedores[nome_fornecedores["RAIZ_CNPJ"].str.contains("-")== False]

for i in nome_fornecedores.columns:
    if nome_fornecedores[i].dtypes=='object':
        nome_fornecedores[i] = nome_fornecedores[i].str.upper()
        
nome_fornecedores = nome_fornecedores.drop_duplicates().reset_index(drop=True)

nome_fornecedores

Unnamed: 0,RAIZ_CNPJ,NOME_FORNECEDOR,FORNECEDOR,AREA_ATUACAO
0,02714643,333 COMERCIO E COMUNICACOES LTDA,7700312,COMÃRCIO
1,02714643,333 COMERCIO E COMUNICACOES LTDA,7700251,COMÃRCIO
2,02714643,333 COMERCIO E COMUNICACOES LTDA,7716773,COMÃRCIO
3,02714643,333 COMERCIO E COMUNICACOES LTDA,7721785,COMÃRCIO
4,02714643,333 COMERCIO E COMUNICACOES LTDA,7729957,COMÃRCIO
...,...,...,...,...
4572,08747596,ZOTTY CELULARES LTDA - ME,7729259,COMÃRCIO
4573,08747596,ZOTTY CELULARES LTDA - ME,7729332,COMÃRCIO
4574,05216804,ZTE DO BRASIL INDÃSTRIA COMÃRCIO,14017703,TELECOMUNICAÃÃES
4575,05216804,ZTE DO BRASIL INDÃSTRIA COMÃRCIO,14025498,TELECOMUNICAÃÃES


In [10]:
len(nome_fornecedores['RAIZ_CNPJ'].unique())

1319

***
### BASE PAGAMENTO PARA PEGAR SCORE DOS KPIs

In [11]:
FILE_Score = "01DF_FULL.csv"
score = pd.read_csv(PATH + FILE_Score, sep = ';', encoding = 'ISO-8859-1', engine ='python')
score = score[['RAIZ_CNPJ', 'COMPETENCIA', 'KPI_PGTO']]

score['COMPETENCIA'] = pd.to_datetime(score['COMPETENCIA'])

score['RAIZ_CNPJ'] = score['RAIZ_CNPJ'].apply(lambda x: str(x).zfill(8))
score = score[score["RAIZ_CNPJ"].str.contains("NAN")== False]
score = score[score["RAIZ_CNPJ"].str.contains("-")== False]

for i in score.columns:
    if score[i].dtypes=='object':
        score[i] = score[i].str.upper()
        
score = score.drop_duplicates().reset_index(drop=True)

score

Unnamed: 0,RAIZ_CNPJ,COMPETENCIA,KPI_PGTO
0,00028986,2021-01-01,BOM
1,00059799,2021-01-01,RUIM
2,00179280,2021-01-01,BOM
3,00196526,2021-01-01,BOM
4,00244116,2021-01-01,BOM
...,...,...,...
6978,88315379,2022-07-01,BOM
6979,91088328,2022-07-01,BOM
6980,92771286,2022-07-01,RUIM
6981,96418264,2022-07-01,BOM


In [12]:
len(score['RAIZ_CNPJ'].unique())

479

***
### JOINS:

### JOIN fornecedores COM NOME_FORNECEDORES

In [13]:
fornecedores_w_name = pagto_sum.join(nome_fornecedores.set_index('FORNECEDOR'),
                                   how='inner',
                                   on=['FORNECEDOR'])\
                               .drop_duplicates().reset_index(drop=True)\
                              # .drop('FORNECEDOR', axis=1)

fornecedores_w_name

Unnamed: 0,FORNECEDOR,MONT_EM_MI,TP_PAGTO_BANCO,TP_PAGTO_FACTORING,TP_PAGTO_FACTORING_MONT_EM_MI,BLOQUEIO_NAO,BLOQUEIO_NAO_MONT_EM_MI,BLOQUEIO_SIM,BLOQUEIO_SIM_MONT_EM_MI,RAIZ_CNPJ,NOME_FORNECEDOR,AREA_ATUACAO
0,10000705,-2.146988e+05,33,0,0.0,33,-2.146988e+05,0,0.0,02641663,FUNDAÃÃO CPQD - CENTRO DE PESQUISA,CONSULTORIA
1,10003085,-2.494560e+08,31525,0,0.0,31525,-2.494560e+08,0,0.0,04052108,AMERICAN TOWER DO BRASIL CESSÃO DE,TELECOMUNICAÃÃES
2,10003998,-1.003034e+07,1690,0,0.0,1690,-1.003034e+07,0,0.0,04052108,AMERICAN TOWER DO BRASIL CESSÃO DE,TELECOMUNICAÃÃES
3,10004490,-3.532824e+05,15,0,0.0,15,-3.532824e+05,0,0.0,47508411,COMPANHIA BRASILEIRA DE DISTRIBUICA,COMÃRCIO
4,10006926,-5.335883e+05,95,0,0.0,95,-5.335883e+05,0,0.0,06862627,CLAUDINO SA LOJAS DE DEPARTAMENTOS,COMÃRCIO
...,...,...,...,...,...,...,...,...,...,...,...,...
831,AL200025,-1.551760e+05,2,0,0.0,2,-1.551760e+05,0,0.0,91088328,TERRA NETWORKS BRASIL S/A,INFORMATICA
832,AL200114,-1.451567e+06,5,0,0.0,5,-1.451567e+06,0,0.0,19290938,TELEFONICA CIBERSEGURANCA E TECNOLO,CONSULTORIA
833,DF0007SP,-1.783616e+05,4,0,0.0,4,-1.783616e+05,0,0.0,74544297,SAP BRASIL LTDA,INFORMATICA
834,DF0711SP,-5.221003e+06,4,0,0.0,4,-5.221003e+06,0,0.0,66970229,NEXTEL TELECOMUNICAÃÃES LTDA,TELECOMUNICAÃÃES


In [14]:
len(fornecedores_w_name['RAIZ_CNPJ'].unique())

472

#### JOIN fornecedores E pagto com KPI

In [15]:
for_pgm_kpi = fornecedores_w_name.join(score.set_index('RAIZ_CNPJ'),
                                          how='inner',
                                          on=['RAIZ_CNPJ'])\
                                 .drop_duplicates().reset_index(drop=True)

for_pgm_kpi

Unnamed: 0,FORNECEDOR,MONT_EM_MI,TP_PAGTO_BANCO,TP_PAGTO_FACTORING,TP_PAGTO_FACTORING_MONT_EM_MI,BLOQUEIO_NAO,BLOQUEIO_NAO_MONT_EM_MI,BLOQUEIO_SIM,BLOQUEIO_SIM_MONT_EM_MI,RAIZ_CNPJ,NOME_FORNECEDOR,AREA_ATUACAO,COMPETENCIA,KPI_PGTO
0,10000705,-214698.81,33,0,0.00,33,-214698.81,0,0.00,02641663,FUNDAÃÃO CPQD - CENTRO DE PESQUISA,CONSULTORIA,2021-01-01,BOM
1,10000705,-214698.81,33,0,0.00,33,-214698.81,0,0.00,02641663,FUNDAÃÃO CPQD - CENTRO DE PESQUISA,CONSULTORIA,2021-02-01,BOM
2,10000705,-214698.81,33,0,0.00,33,-214698.81,0,0.00,02641663,FUNDAÃÃO CPQD - CENTRO DE PESQUISA,CONSULTORIA,2021-03-01,BOM
3,10000705,-214698.81,33,0,0.00,33,-214698.81,0,0.00,02641663,FUNDAÃÃO CPQD - CENTRO DE PESQUISA,CONSULTORIA,2021-04-01,BOM
4,10000705,-214698.81,33,0,0.00,33,-214698.81,0,0.00,02641663,FUNDAÃÃO CPQD - CENTRO DE PESQUISA,CONSULTORIA,2021-05-01,BOM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8388,14066856,-919685.02,0,85,-919685.02,0,0.00,85,-919685.02,29090742,M2 SERVICOS DE MENSAGERIA,TRANSPORTES,2022-03-01,RUIM
8389,14066856,-919685.02,0,85,-919685.02,0,0.00,85,-919685.02,29090742,M2 SERVICOS DE MENSAGERIA,TRANSPORTES,2022-04-01,RUIM
8390,14066856,-919685.02,0,85,-919685.02,0,0.00,85,-919685.02,29090742,M2 SERVICOS DE MENSAGERIA,TRANSPORTES,2022-05-01,RUIM
8391,14066856,-919685.02,0,85,-919685.02,0,0.00,85,-919685.02,29090742,M2 SERVICOS DE MENSAGERIA,TRANSPORTES,2022-06-01,RUIM


In [16]:
len(for_pgm_kpi['RAIZ_CNPJ'].unique())

302

In [17]:
for_pgm_kpi.to_excel(r'intermediate/etl_previsao_pagamentos.xlsx')