# Formas de baixar os csvs

In [3]:
import requests
from zipfile import ZipFile

# fazendo o download do conteúdo do arquivo
url = "http://www.camara.leg.br/cotas/Ano-2018.csv.zip"
r = requests.get(url)

# como acessar o conteúdo do arquivo?
# r.content

# como verificar se a requisição deu certo?
# r.status (lista de status)

file = open("Ano-2018.csv.zip", "wb")
file.write(r.content)
file.close()

zip_file = ZipFile(f"Ano-2018.csv.zip", "r")
zip_file.extract(member=f"Ano-2018.csv", path=f"reembolso-2018")
zip_file.close()

## podemos ler os arquivos de outro jeito


precisamos ler e escrever como `bytes objects` sem nenhum `deecoding`

In [4]:
with open("Ano-2018-teste.csv.zip", "w") as file:
    file.write(r.content)

TypeError: write() argument must be str, not bytes

o `with` é legal porque:

* ele é um gerenciador de contexto (`context manager`) 
* ele gerencia sozinho toda fase de configuração para trabalhar com um arquivo (`setup`) e o encerramento dessa seção de trabalho (`teardown`)

In [5]:
with open("Ano-2018-teste.csv.zip", "wb") as file:
    file.write(r.content)

In [6]:
with ZipFile(f"Ano-2018.csv.zip", "r") as zip_file:
    zip_file.extract(member=f"Ano-2018.csv", path=f"reembolso-2018")

Utilizando o pandas diretamente

In [11]:
import pandas as pd
pd.set_option("display.max_columns", None)

df = pd.read_csv(
    "http://www.camara.leg.br/cotas/Ano-2018.csv.zip",
    sep=";",
    compression="zip"
)

df.head()

Unnamed: 0,txNomeParlamentar,cpf,ideCadastro,nuCarteiraParlamentar,nuLegislatura,sgUF,sgPartido,codLegislatura,numSubCota,txtDescricao,numEspecificacaoSubCota,txtDescricaoEspecificacao,txtFornecedor,txtCNPJCPF,txtNumero,indTipoDocumento,datEmissao,vlrDocumento,vlrGlosa,vlrLiquido,numMes,numAno,numParcela,txtPassageiro,txtTrecho,numLote,numRessarcimento,vlrRestituicao,nuDeputadoId,ideDocumento,urlDocumento
0,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,BISCOITOS CASEIROS HOMONNAI LTDA,046.448.200/0017-7,10473,4,2018-12-20T00:00:00,454.41,0.0,454.41,12,2018,0,,,1550817,,,2812,6728796,https://www.camara.leg.br/cota-parlamentar/not...
1,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,BONNA COMERCIAL DE ALIMENTOS EIRELI,106.385.970/0015-8,105,4,2018-02-21T00:00:00,242.9,0.0,242.9,2,2018,0,,,1467117,,,2812,6508293,https://www.camara.leg.br/cota-parlamentar/not...
2,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CAFE DO FLAVIO EIRELLI-ME,248.825.670/0010-5,40792,4,2018-09-05T00:00:00,79.0,0.0,79.0,9,2018,0,,,1527969,,,2812,6666881,https://www.camara.leg.br/cota-parlamentar/not...
3,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CAFE DO FLAVIO EIRELLI-ME,248.825.670/0010-5,42181,4,2018-10-16T00:00:00,207.0,0.0,207.0,10,2018,0,,,1532624,,,2812,6681167,https://www.camara.leg.br/cota-parlamentar/not...
4,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,409,4,2018-02-05T00:00:00,98.75,0.0,98.75,2,2018,0,,,1460984,,,2812,6491959,https://www.camara.leg.br/cota-parlamentar/not...


# Qual a importância de passar o tipo das colunas na hora de leitura do csv?

O pandas pode errar quando tenta determinar o tipo das variáveis (ele lê todos os dados antes), para facilitar isso, passamos o tipo das colunas.

In [25]:
df.dtypes

txNomeParlamentar             object
cpf                          float64
ideCadastro                  float64
nuCarteiraParlamentar        float64
nuLegislatura                  int64
sgUF                          object
sgPartido                     object
codLegislatura                 int64
numSubCota                     int64
txtDescricao                  object
numEspecificacaoSubCota        int64
txtDescricaoEspecificacao     object
txtFornecedor                 object
txtCNPJCPF                    object
txtNumero                     object
indTipoDocumento               int64
datEmissao                    object
vlrDocumento                 float64
vlrGlosa                     float64
vlrLiquido                   float64
numMes                         int64
numAno                         int64
numParcela                     int64
txtPassageiro                 object
txtTrecho                     object
numLote                        int64
numRessarcimento             float64
v

In [81]:
DTYPE = {
    "txNomeParlamentar": str,
    "cpf": str,
    "ideCadastro": str,
    "nuCarteiraParlamentar": str,
    "nuLegislatura": str,
    "sgUF": str,
    "sgPartido": str,
    "codLegislatura": str,
    "numSubCota": str,
    "txtDescricao": str,
    "numEspecificacaoSubCota": str,
    "txtDescricaoEspecificacao": str,
    "txtFornecedor": str,
    "txtCNPJCPF": str,
    "txtNumero": str,
    "indTipoDocumento": str,
    "datEmissao": str,
    "vlrDocumento": float,
    "vlrGlosa": str,
    "vlrLiquido": float,
    "numMes": str,
    "numAno": str,
    "numParcela": str,
    "txtPassageiro": str,
    "txtTrecho": str,
    "numLote": str,
    "numRessarcimento": str,
    "nuDeputadoId": str,
    "ideDocumento": str,
}

df_reembolso = pd.read_csv(
    "http://www.camara.leg.br/cotas/Ano-2018.csv.zip",
    sep=";",
    compression="zip",
    dtype=DTYPE
)

In [33]:
df_reembolso.dtypes

txNomeParlamentar             object
cpf                           object
ideCadastro                   object
nuCarteiraParlamentar         object
nuLegislatura                 object
sgUF                          object
sgPartido                     object
codLegislatura                object
numSubCota                    object
txtDescricao                  object
numEspecificacaoSubCota       object
txtDescricaoEspecificacao     object
txtFornecedor                 object
txtCNPJCPF                    object
txtNumero                     object
indTipoDocumento              object
datEmissao                    object
vlrDocumento                 float64
vlrGlosa                      object
vlrLiquido                   float64
numMes                        object
numAno                        object
numParcela                    object
txtPassageiro                 object
txtTrecho                     object
numLote                       object
numRessarcimento              object
v

# Funções úteis

## `astype()`

In [34]:
df_reembolso.astype("object").dtypes

txNomeParlamentar            object
cpf                          object
ideCadastro                  object
nuCarteiraParlamentar        object
nuLegislatura                object
sgUF                         object
sgPartido                    object
codLegislatura               object
numSubCota                   object
txtDescricao                 object
numEspecificacaoSubCota      object
txtDescricaoEspecificacao    object
txtFornecedor                object
txtCNPJCPF                   object
txtNumero                    object
indTipoDocumento             object
datEmissao                   object
vlrDocumento                 object
vlrGlosa                     object
vlrLiquido                   object
numMes                       object
numAno                       object
numParcela                   object
txtPassageiro                object
txtTrecho                    object
numLote                      object
numRessarcimento             object
vlrRestituicao              

In [36]:
df_reembolso.astype({"vlrRestituicao": "object"}).dtypes

txNomeParlamentar             object
cpf                           object
ideCadastro                   object
nuCarteiraParlamentar         object
nuLegislatura                 object
sgUF                          object
sgPartido                     object
codLegislatura                object
numSubCota                    object
txtDescricao                  object
numEspecificacaoSubCota       object
txtDescricaoEspecificacao     object
txtFornecedor                 object
txtCNPJCPF                    object
txtNumero                     object
indTipoDocumento              object
datEmissao                    object
vlrDocumento                 float64
vlrGlosa                      object
vlrLiquido                   float64
numMes                        object
numAno                        object
numParcela                    object
txtPassageiro                 object
txtTrecho                     object
numLote                       object
numRessarcimento              object
v

## `to_datetime()`

In [39]:
df_reembolso.datEmissao.dtypes

dtype('O')

In [40]:
df_reembolso.datEmissao

0         2018-12-20T00:00:00
1         2018-02-21T00:00:00
2         2018-09-05T00:00:00
3         2018-10-16T00:00:00
4         2018-02-05T00:00:00
                 ...         
292749                    NaN
292750    2018-12-20T00:00:00
292751    2018-12-20T00:00:00
292752    2019-01-08T00:00:00
292753    2018-12-28T00:00:00
Name: datEmissao, Length: 292754, dtype: object

In [41]:
pd.to_datetime(df_reembolso.datEmissao, format="%Y-%m-%d")

0        2018-12-20
1        2018-02-21
2        2018-09-05
3        2018-10-16
4        2018-02-05
            ...    
292749          NaT
292750   2018-12-20
292751   2018-12-20
292752   2019-01-08
292753   2018-12-28
Name: datEmissao, Length: 292754, dtype: datetime64[ns]

In [82]:
df_reembolso_2 = pd.read_csv(
    "http://www.camara.leg.br/cotas/Ano-2019.csv.zip",
    sep=";",
    compression="zip",
    dtype=DTYPE,
    parse_dates=["datEmissao"]
)

In [43]:
df_reembolso_2.datEmissao

0        2018-12-20
1        2018-02-21
2        2018-09-05
3        2018-10-16
4        2018-02-05
            ...    
292749          NaT
292750   2018-12-20
292751   2018-12-20
292752   2019-01-08
292753   2018-12-28
Name: datEmissao, Length: 292754, dtype: datetime64[ns]

### coisinhas legais com datas

In [51]:
df_reembolso_2.datEmissao.dt

<pandas.core.indexes.accessors.DatetimeProperties object at 0x7f5632e6ba60>

In [52]:
df_reembolso_2.datEmissao.dt.day_name()

0          Thursday
1         Wednesday
2         Wednesday
3           Tuesday
4            Monday
            ...    
292749          NaN
292750     Thursday
292751     Thursday
292752      Tuesday
292753       Friday
Name: datEmissao, Length: 292754, dtype: object

In [54]:
df_reembolso_2.datEmissao.dt.dayofweek

0         3.0
1         2.0
2         2.0
3         1.0
4         0.0
         ... 
292749    NaN
292750    3.0
292751    3.0
292752    1.0
292753    4.0
Name: datEmissao, Length: 292754, dtype: float64

In [55]:
df_reembolso_2.datEmissao.dt.year

0         2018.0
1         2018.0
2         2018.0
3         2018.0
4         2018.0
           ...  
292749       NaN
292750    2018.0
292751    2018.0
292752    2019.0
292753    2018.0
Name: datEmissao, Length: 292754, dtype: float64

In [56]:
df_reembolso_2.datEmissao.dt.month

0         12.0
1          2.0
2          9.0
3         10.0
4          2.0
          ... 
292749     NaN
292750    12.0
292751    12.0
292752     1.0
292753    12.0
Name: datEmissao, Length: 292754, dtype: float64

### range de datas

`.loc` -> Access a group of rows and columns by label(s) or a boolean array.

.loc[] is primarily label based, but may also be used with a boolean array.

In [61]:
df_reembolso_2.loc[
    (df_reembolso_2.datEmissao >= "2018-01-01")
    & (df_reembolso_2.datEmissao <= "2018-04-30")
]

Unnamed: 0,txNomeParlamentar,cpf,ideCadastro,nuCarteiraParlamentar,nuLegislatura,sgUF,sgPartido,codLegislatura,numSubCota,txtDescricao,numEspecificacaoSubCota,txtDescricaoEspecificacao,txtFornecedor,txtCNPJCPF,txtNumero,indTipoDocumento,datEmissao,vlrDocumento,vlrGlosa,vlrLiquido,numMes,numAno,numParcela,txtPassageiro,txtTrecho,numLote,numRessarcimento,vlrRestituicao,nuDeputadoId,ideDocumento,urlDocumento
1,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,BONNA COMERCIAL DE ALIMENTOS EIRELI,106.385.970/0015-8,105,4,2018-02-21,242.90,0,242.90,2,2018,0,,,1467117,,,2812,6508293,https://www.camara.leg.br/cota-parlamentar/not...
4,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,409,4,2018-02-05,98.75,0,98.75,2,2018,0,,,1460984,,,2812,6491959,https://www.camara.leg.br/cota-parlamentar/not...
5,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,414,4,2018-03-13,127.83,0,127.83,3,2018,0,,,1473970,,,2812,6525989,https://www.camara.leg.br/cota-parlamentar/not...
6,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,420,4,2018-04-03,118.15,0,118.15,4,2018,0,,,1480990,,,2812,6544380,https://www.camara.leg.br/cota-parlamentar/not...
7,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,440,4,2018-04-24,123.17,0,123.17,4,2018,0,,,1487485,,,2812,6561122,https://www.camara.leg.br/cota-parlamentar/not...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292296,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Bilhete: 957-2164.438917,0,2018-04-26,1904.17,0,1904.17,4,2018,0,PROF. GEDEÃO AMORIM,MAO/BSB,0,0,,3177,1601020,
292311,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Comp-957-2161.584478,0,2018-02-08,-1169.17,0,-1169.17,2,2018,0,PROF. GEDEÃO AMORIM,MAO/BSB,0,0,,3177,1616800,
292312,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Comp-957-2161.584479,0,2018-02-08,-475.93,0,-475.93,2,2018,0,PROF. GEDEÃO AMORIM,BSB/MAO,0,0,,3177,1616803,
292313,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Comp-957-2161.584481,0,2018-02-08,-442.93,0,-442.93,2,2018,0,PROF. GEDEÃO AMORIM,BSB/MAO,0,0,,3177,1619090,


In [59]:
df_reembolso_2.set_index("datEmissao").loc["2018-01-01":"2018-04-30"]

Unnamed: 0_level_0,txNomeParlamentar,cpf,ideCadastro,nuCarteiraParlamentar,nuLegislatura,sgUF,sgPartido,codLegislatura,numSubCota,txtDescricao,numEspecificacaoSubCota,txtDescricaoEspecificacao,txtFornecedor,txtCNPJCPF,txtNumero,indTipoDocumento,vlrDocumento,vlrGlosa,vlrLiquido,numMes,numAno,numParcela,txtPassageiro,txtTrecho,numLote,numRessarcimento,vlrRestituicao,nuDeputadoId,ideDocumento,urlDocumento
datEmissao,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2018-02-21,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,BONNA COMERCIAL DE ALIMENTOS EIRELI,106.385.970/0015-8,105,4,242.90,0,242.90,2,2018,0,,,1467117,,,2812,6508293,https://www.camara.leg.br/cota-parlamentar/not...
2018-02-05,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,409,4,98.75,0,98.75,2,2018,0,,,1460984,,,2812,6491959,https://www.camara.leg.br/cota-parlamentar/not...
2018-03-13,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,414,4,127.83,0,127.83,3,2018,0,,,1473970,,,2812,6525989,https://www.camara.leg.br/cota-parlamentar/not...
2018-04-03,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,420,4,118.15,0,118.15,4,2018,0,,,1480990,,,2812,6544380,https://www.camara.leg.br/cota-parlamentar/not...
2018-04-24,LID.GOV-CD,,,,2015,,,55,13,FORNECIMENTO DE ALIMENTAÇÃO DO PARLAMENTAR,0,,CRISTIANE FERREIRA EPP,037.139.030/0010-8,440,4,123.17,0,123.17,4,2018,0,,,1487485,,,2812,6561122,https://www.camara.leg.br/cota-parlamentar/not...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-04-26,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Bilhete: 957-2164.438917,0,1904.17,0,1904.17,4,2018,0,PROF. GEDEÃO AMORIM,MAO/BSB,0,0,,3177,1601020,
2018-02-08,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Comp-957-2161.584478,0,-1169.17,0,-1169.17,2,2018,0,PROF. GEDEÃO AMORIM,MAO/BSB,0,0,,3177,1616800,
2018-02-08,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Comp-957-2161.584479,0,-475.93,0,-475.93,2,2018,0,PROF. GEDEÃO AMORIM,BSB/MAO,0,0,,3177,1616803,
2018-02-08,PROF. GEDEÃO AMORIM,01196820287,200153,609,2015,AM,MDB,55,999,PASSAGEM AÉREA - RPA,0,,Cia Aérea - TAM,020.128.620/0016-0,Comp-957-2161.584481,0,-442.93,0,-442.93,2,2018,0,PROF. GEDEÃO AMORIM,BSB/MAO,0,0,,3177,1619090,


# O caso do `vlrLiquido`

Aqui nós podemos ver que temos valores negativos. Esse valores são referentes a bilhetes de compensação, bilhete de compensação é o bilhete aéreo não voado, que foi reembolsado à Câmara dos Deputados. Portanto, o valor relativo aos bilhetes compensados (não voados) são reembolsados/devolvidos à Casa, deduzindo-se as taxas de cancelamento e de no-show, de acordo com as regras tarifárias da companhia aérea respectiva.

Esse crédito pode voltar para a conta corrente do parlamentar mantida na companhia aérea para ser utilizado com nova emissão de passagem aérea ou poderá voltar para a CEAP do respectivo parlamentar, ficando disponível para utilização com as demais despesas.

In [63]:
df_reembolso.vlrLiquido.min()

-4906.0

In [65]:
df_reembolso.vlrLiquido.describe()

count    292754.000000
mean        735.865371
std        2481.009069
min       -4906.000000
25%          50.000000
50%         192.170000
75%         671.170000
max      130000.000000
Name: vlrLiquido, dtype: float64

In [83]:
negative_documents = df_reembolso_2[df_reembolso_2["vlrLiquido"] < 0]

len(negative_documents)

10739

In [84]:
negative_documents['ideDocumento']

920       1640498
1759         2203
1763         2275
1766         1660
1772          313
           ...   
289164    1708904
289176    1723048
289177    1724703
289178    1723049
289318    1724810
Name: ideDocumento, Length: 10739, dtype: object

In [85]:
positive_documents = df_reembolso_2[
    (df_reembolso_2['ideDocumento'].isin(negative_documents['ideDocumento']))
    & 
    (df_reembolso_2["vlrLiquido"] > 0)
]
positive_documents.head()

Unnamed: 0,txNomeParlamentar,cpf,ideCadastro,nuCarteiraParlamentar,nuLegislatura,sgUF,sgPartido,codLegislatura,numSubCota,txtDescricao,numEspecificacaoSubCota,txtDescricaoEspecificacao,txtFornecedor,txtCNPJCPF,txtNumero,indTipoDocumento,datEmissao,vlrDocumento,vlrGlosa,vlrLiquido,numMes,numAno,numParcela,txtPassageiro,txtTrecho,numLote,numRessarcimento,vlrRestituicao,nuDeputadoId,ideDocumento,urlDocumento
1758,Fausto Pinato,28022995819,66828,355,2019,SP,PP,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,GLLN8G,0,2019-11-20 12:00:00,1386.5,0,1386.5,11,2019,0,Fausto Ruy Pinato,BSB/CGH,0,0,,2917,2203,
1762,Fausto Pinato,28022995819,66828,355,2019,SP,PP,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,KSKZ8B,0,2019-11-21 12:00:00,1791.57,0,1791.57,11,2019,0,Fausto Ruy Pinato,BSB/CGH,0,0,,2917,2275,
1765,Fausto Pinato,28022995819,66828,355,2019,SP,PP,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,LM8MGG,0,2019-10-21 12:00:00,1277.57,0,1277.57,10,2019,0,Roberto Holanda Craveiro,BSB/GRU,0,0,,2917,1660,
1771,Fausto Pinato,28022995819,66828,355,2019,SP,PP,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,VRF7MH,0,2019-07-17 12:00:00,1432.88,0,1432.88,7,2019,0,Fausto Ruy Pinato,SDU/BSB,0,0,,2917,313,
3113,Bacelar,10626409500,69871,184,2019,BA,PV,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,CR6F4R,0,2019-10-10 12:00:00,1046.13,0,1046.13,10,2019,0,JOAO CARLOS BACELAR BATISTA,SSA/BSB,0,0,,2985,1503,


In [86]:
len(positive_documents)

602

In [88]:
positive_documents.iloc[0].to_frame()

Unnamed: 0,1758
txNomeParlamentar,Fausto Pinato
cpf,28022995819
ideCadastro,66828
nuCarteiraParlamentar,355
nuLegislatura,2019
sgUF,SP
sgPartido,PP
codLegislatura,56
numSubCota,998
txtDescricao,PASSAGEM AÉREA - SIGEPA


In [89]:
sample = negative_documents[negative_documents.duplicated(subset = 'ideDocumento')].iloc[1] #, keep = {‘last’, ‘first’, ‘false’)

In [91]:
df_reembolso_2[df_reembolso_2['ideDocumento'] == sample['ideDocumento']]

Unnamed: 0,txNomeParlamentar,cpf,ideCadastro,nuCarteiraParlamentar,nuLegislatura,sgUF,sgPartido,codLegislatura,numSubCota,txtDescricao,numEspecificacaoSubCota,txtDescricaoEspecificacao,txtFornecedor,txtCNPJCPF,txtNumero,indTipoDocumento,datEmissao,vlrDocumento,vlrGlosa,vlrLiquido,numMes,numAno,numParcela,txtPassageiro,txtTrecho,numLote,numRessarcimento,vlrRestituicao,nuDeputadoId,ideDocumento,urlDocumento
3134,Bacelar,10626409500,69871,184,2019,BA,PV,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,RMQ2VW,0,2019-09-26 12:00:00,1423.46,0,1423.46,9,2019,0,JOAO CARLOS BACELAR BATISTA,SSA/BSB,0,0,,2985,1205,
3135,Bacelar,10626409500,69871,184,2019,BA,PV,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,RMQ2VW,0,2019-10-01 12:00:00,-25.0,0,-25.0,10,2019,0,JOAO CARLOS BACELAR BATISTA,SSA/BSB,0,0,,2985,1205,
3136,Bacelar,10626409500,69871,184,2019,BA,PV,56,998,PASSAGEM AÉREA - SIGEPA,0,,GOL,,RMQ2VW,0,2019-11-28 12:00:00,-1330.14,0,-1330.14,11,2019,0,JOAO CARLOS BACELAR BATISTA,BSB/BSB,0,0,,2985,1205,
