In [27]:
import warnings
import pandas as pd
from pandas.core.common import SettingWithCopyWarning
import numpy as np
import json

In [28]:
# CONFIGURANDO A EXIBIÇAO DE REGISTROS NA TELA #

pd.options.display.max_rows = 999
pd.options.display.max_columns=999
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

In [16]:
# IMPORTAÇÃO SIMPLES DE PLANILHA EXCEL #

arquivo_Alex_NE = 'C:\\Temp\\Frete_Rodrigo\\INDICADORES_FRETE_2021_COMPLETO_zerados.xlsx'

dfANE = pd.read_excel(arquivo_Alex_NE
                      , sheet_name='Não encontrados'
                      , usecols=[0]
                      , names=['CodDocVenda']
                      , dtype={'CodDocVenda':'str'})

In [11]:
# IMPORTAÇAO SIMPLES DE ARQUIVO CSV #

arquivoDatabricks = 'C:\\Temp\\Frete_Rodrigo\\base_databricks.csv'

dfADB = pd.read_csv(arquivoDatabricks
                    , sep=','
                    , usecols=[0,10,39,40]
                    , names=['AnoMes','CodDocVenda','VlrPesoLiqItem','VlrLiqItemOrdem']
                    , dtype={'AnoMes':'str'
                             , 'CodDocVenda':'str'
                             ,'VlrPesoLiqItem':'float'
                             ,'VlrLiqItemOrdem':'float' }
                    , header=0)




In [4]:
# ESTRUTURA DE UM DATAFRAME #

dfADB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1385060 entries, 0 to 1385059
Data columns (total 4 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   AnoMes           1385060 non-null  object 
 1   CodDocVenda      1385060 non-null  object 
 2   VlrPesoLiqItem   1385060 non-null  float64
 3   VlrLiqItemOrdem  1385060 non-null  float64
dtypes: float64(2), object(2)
memory usage: 42.3+ MB


In [5]:
# SELECIONANDO TOP N REGISTROS DE UM DATAFRAME #

dfADB.head(5)

Unnamed: 0,AnoMes,CodDocVenda,VlrPesoLiqItem,VlrLiqItemOrdem
0,201901,4001005220,-10614.0,-841.8
1,201901,4001000729,-260.0,-59.2
2,202001,4001357831,1035.0,135.15
3,202001,4001357831,40.0,5.22
4,202001,4001357831,3510.0,458.34


In [12]:
# AGRUPANDO VALORES DENTRO DE UM DATAFRAME #

dfADB = dfADB.groupby(['AnoMes','CodDocVenda'], as_index=False).agg(VlrPesoLiqItem=('VlrPesoLiqItem','sum')
                                                   ,VlrLiqItemOrdem=('VlrLiqItemOrdem','sum'))


dfADB

Unnamed: 0,AnoMes,CodDocVenda,VlrPesoLiqItem,VlrLiqItemOrdem
0,201601,4000053944,7714.0,138.32
1,201601,4000053984,13104.0,2933.28
2,201601,4000054402,2600.0,896.27
3,201601,4000054623,13538.0,1534.76
4,201601,4000054704,9048.0,1369.23
...,...,...,...,...
663184,202212,4002554296,27630.0,8306.68
663185,202212,4002554497,28300.0,9940.94
663186,202212,4002554696,28060.0,9856.64
663187,202212,4002554895,3250.0,630.00


In [14]:
# ORDENANDO UM DATAFRAME #

dfADB.sort_values(by='AnoMes', inplace=True, ascending=False)

dfADB

Unnamed: 0,AnoMes,CodDocVenda,VlrPesoLiqItem,VlrLiqItemOrdem
663188,202212,4002555095,3497.0,1455.29
663176,202212,4002553597,13052.0,1385.52
663175,202212,4002553596,28570.0,8589.29
661271,202212,4002524862,15912.0,2019.60
663186,202212,4002554696,28060.0,9856.64
...,...,...,...,...
6,201601,4000054814,6240.0,648.00
5,201601,4000054743,29450.0,3159.69
4,201601,4000054704,9048.0,1369.23
3,201601,4000054623,13538.0,1534.76


In [None]:
# REMOVENDO DUPLICIDADES DE UM DATAFRAME #

arquivo_Alex_NE = 'C:\\Temp\\Frete_Rodrigo\\INDICADORES_FRETE_2021_COMPLETO_zerados.xlsx'

dfANE = pd.read_excel(arquivo_Alex_NE
                      , sheet_name='Não encontrados'
                      , usecols=[0]
                      , names=['CodDocVenda']
                      , dtype={'CodDocVenda':'str'})

dfANE = dfANE.drop_duplicates(
  subset = ['CodDocVenda'],
  keep = 'last').reset_index(drop = True)

dfANE.info()

In [17]:
# RELACIONANDO DOIS DATAFRAMES #

resultAmostra = pd.merge(left=dfANE
                              , right=dfADB
                              , left_on='CodDocVenda'
                              , right_on='CodDocVenda'                                                                                              
                                    )

In [24]:
# TRATANDO VALORES NULOS #

resultAmostra.fillna('Nulo',inplace=True)

In [23]:
# APLICANDO O COMANDO CASE DO SQL SERVER #

resultAmostra.loc[resultAmostra['AnoMes'] >= '202201', 'Comparacao'] = 'REGISTRO NOVO'
resultAmostra.loc[resultAmostra['AnoMes'] < '202201', 'Comparacao'] = 'REGISTRO VELHO'

resultAmostra.groupby('Comparacao').agg(Qtd=('Comparacao','count'))


Unnamed: 0_level_0,Qtd
Comparacao,Unnamed: 1_level_1
REGISTRO NOVO,99
REGISTRO VELHO,60501


In [26]:
# COMPLETANDO COM ZERO A ESQUERDA #

resultAmostra['CodDocVenda'] = pd.Series(resultAmostra['CodDocVenda']).astype(str).str.zfill(18) 

resultAmostra

Unnamed: 0,CodDocVenda,AnoMes,VlrPesoLiqItem,VlrLiqItemOrdem,Comparacao
0,000000004000240586,202111,3666.0,611.94,REGISTRO VELHO
1,000000004000240586,201609,858.0,143.22,REGISTRO VELHO
2,000000004000261953,202111,2908.0,485.40,REGISTRO VELHO
3,000000004000261953,201609,1495.0,249.55,REGISTRO VELHO
4,000000004000650069,202112,30500.0,3876.68,REGISTRO VELHO
...,...,...,...,...,...
60595,000000004002160602,202112,15022.0,663.04,REGISTRO VELHO
60596,000000004002160996,202112,21690.0,812.29,REGISTRO VELHO
60597,000000004002161195,202112,11306.0,1213.66,REGISTRO VELHO
60598,000000004002161796,202112,14469.0,3172.05,REGISTRO VELHO


In [None]:
# RENOMEANDO COLUNAS - REMANE #

result_ZERADO.rename(columns = {'VlrLiqItemOrdem_x':'VlrLiqItemOrdem'
                               ,'VlrPesoLiqItem_x':'VlrPesoLiqItem'}, inplace = True)

In [None]:
# COMANDO LEFT E RIGHT DO SQL SERVER #

result02['codrelatorio'] = result02['Chave'].str[:10]
result02['codnumrelatorio'] = result02['Chave'].str[10:]
