<br>

# TJSP

In [1]:
import os
import tabula
import requests
import pandas as pd
from datetime import datetime

In [2]:
dia = datetime.today()
dia.strftime('%Y.%M.%d')

'2021.21.20'

In [3]:
# Create Directory
os.makedirs('data', exist_ok=True)

# URL to Download
url = 'https://www.tjsp.jus.br/Download/Tabelas/TabelaDebitosJudiciais.pdf'

# Requests
r = requests.get(url, allow_redirects=True)
open(os.path.join('data', 'tab_tjsp.pdf'), 'wb').write(r.content)

130505

In [9]:
# Read pdf
dfs = tabula.read_pdf(os.path.join('data', 'tab_tjsp.pdf'), pages='all')

# Loop
df_concat = pd.DataFrame()
for n in range(len(dfs)):
    df = dfs[n]
    df.rename(columns={'Unnamed: 0': 'mes'}, inplace=True, errors='ignore')
    df.set_index('mes', inplace=True)
    df.drop('Unnamed: 1', axis=1, inplace=True, errors='ignore')
    df_concat = pd.concat([df_concat, df], axis=1)

# Flat Dataframe
df = df_concat
df = df.stack()
df = pd.DataFrame(df)
df = df.reset_index()

# Rename Columns
df.rename(columns={'level_1': 'ano', 0: 'taxa'}, inplace=True, errors='ignore')

# Rename Values
dict_mes = {
    'JAN': 1,
    'FEV': 2, 
    'MAR': 3,
    'ABR': 4,
    'MAI': 5,
    'JUN': 6,
    'JUL': 7,
    'AGO': 8,
    'SET': 9,
    'OUT': 10,
    'NOV': 11,
    'DEZ': 12,
}

# Ajusta Mês
df = df.replace({'mes': dict_mes})
df['mes'] = df['mes'].astype(int)

# Ajusta Ano
df['ano'] = df['ano'].str.replace(' ', '')
df['ano'] = df['ano'].astype(int)

# Ajusta Taxa
df['taxa_string'] = df['taxa']
df['taxa'] = df['taxa'].str.replace('-', '', regex=True)
df['taxa'] = df['taxa'].str.replace('.', '', regex=True)
df['taxa'] = df['taxa'].str.replace(',', '.', regex=True)
df = df[df['taxa'] != '']
df['taxa'] = df['taxa'].astype(float).copy()

# Ajusta Datas
df['year'] = df['ano']
df['month'] = df['mes']
df['day'] = 1

df['data'] = pd.to_datetime(df[['year', 'month', 'day']])
df['data_ref'] = df['data'].dt.strftime('%Y-%m')

# Drop
df.drop(['year', 'month', 'day'], axis=1, inplace=True, errors='ignore')

# Sortear
df.sort_values('data', inplace=True)
df = df.reindex(['data', 'data_ref', 'ano', 'mes', 'taxa_string', 'taxa'], axis=1)
df.reset_index(inplace=True)

# Drop
df.drop('index', axis=1, inplace=True, errors='ignore')

# Save
df.to_csv(
    os.path.join('data', 'tab_tjsp.csv'),
    index=False,
    decimal=',',
)

# Results
print(df.dtypes)
display(df)

data           datetime64[ns]
data_ref               object
ano                     int64
mes                     int64
taxa_string            object
taxa                  float64
dtype: object


Unnamed: 0,data,data_ref,ano,mes,taxa_string,taxa
0,1964-10-01,1964-10,1964,10,"10.000,00",10000.000000
1,1964-11-01,1964-11,1964,11,"10.000,00",10000.000000
2,1964-12-01,1964-12,1964,12,"10.000,00",10000.000000
3,1965-01-01,1965-01,1965,1,"11.300,00",11300.000000
4,1965-02-01,1965-02,1965,2,"11.300,00",11300.000000
...,...,...,...,...,...,...
677,2021-03-01,2021-03,2021,3,77826226,77.826226
678,2021-04-01,2021-04,2021,4,78495531,78.495531
679,2021-05-01,2021-05,2021,5,78793814,78.793814
680,2021-06-01,2021-06,2021,6,79550234,79.550234
