<a href="https://colab.research.google.com/github/gabrielmacedoanac/ANAC/blob/main/regulmanentos_anac.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pipeline de dados: publicações oficiais da ANAC (regulamentos, normas, orientações, decisões...)

## Importar, tratar e organizar os dados de origem

In [44]:
import pandas as pd
import requests

# url do arquivo com a lista de links a serem baixados em formato .json
url = 'https://raw.githubusercontent.com/gabrielmacedoanac/flat-data-anac/main/regulamentos-url-json.csv' 

# transformar o arquivo em formato de lista
urls = requests.get(url).text.split() 

# cria dataframe vazio
df = pd.DataFrame() 

# percorre a lista de url dos arquivos e cria um dataframe com o conteúdo de todos os arquivos ignorando os índices individuais
for item in urls:
  df = pd.read_json(item).append(df, ignore_index=True)

# Criar uma cópia do dataframe
df_o = df.copy()

# Remover linhas vazias e duplicadas do dataframe criado, mantendo a última ocorrência das linhas duplicadas
df.dropna(how='all')
df.drop_duplicates(keep='last', inplace=True)

# Remover espaços vazios e quebras de linhas antes e depois dos textos nas colunas
df['ementa'] = df['ementa'].str.strip()
df['norma'] = df['norma'].str.strip()
df['tornada_sem_efeito'] = df['tornada_sem_efeito'].str.strip()
df['alterada'] = df['alterada'].str.strip()
df['data'] = df['data'].str.strip()
df['outros'] = df['outros'].str.strip()
df['tipo_normatico'] = df['tipo_normatico'].str.strip()
df['publicacao'] = df['publicacao'].str.strip()
df['revogada'] = df['revogada'].str.strip()
df['em_vigor'] = df['em_vigor'].str.strip()
df['anexos'] = df['anexos'].str.strip()

# Corrigir os dados incorretos publicados pela ANAC. Série de regras de substituição de caracteres
# corrige links errados na coluna anexos
df['anexos']=df['anexos'].str.replace('portalhomolog2', 'www')
df['anexos']=df['anexos'].str.replace('@@download/', '/@@display-file/')

## Extrair e criar metadados (tags)

In [45]:
# Extrair e criar metadados

# Importar biblioteca de expressões regulares
import re

# Criar colunas de dados_nao_estruturados para gerar as tags
df['dados_nao_estruturados']=df[['ementa','norma']].agg(' | '.join, axis=1)

# Gerar tags ignorando capitalização (maiúsculas e minúsculas) a partir da
df['tags']=df['dados_nao_estruturados'].str.lower().str.findall('([0-9]{1,13}\.[0-9]{1,15}|rbac|rbha|lei|decret\w+|decis\w+|crm|cnpj|cpf|portari\w+|isen\w+|cumpriment\w+ alternativ\w+|n[i|í]v\w+ equivalent\w+ d\w+ seguran\w+|diretri\w+ d\w+ aeronavegabilidade|certificad\w+ suplementar\w+ de tip\w+|embraer|alter\w+|modific\w+)', flags=re.IGNORECASE)

# Gerar valores únicos para as tags e unir com "|" 
df['tags']=df['tags'].apply(set).str.join("|")

# Classificar os valores em ordem alfabética
df['tags']=df['tags'].map(lambda x: '|'.join(sorted(x.split('|')))) 

# Substituir valores (texto) das tags para valores desejados
df['tags']=df['tags'].str.replace(r'certificad\w+ suplementar\w+ de tip\w+', 'cst', regex=True)

# Remover coluna dados_nao_estruturados
df.drop(columns=["dados_nao_estruturados"],inplace=True)

# Converter valores das tags para uma lista
df['tags'] = df['tags'].str.replace("|", ", ", regex=False).str.split(', ').tolist()

# Ordenar a lista de forma alfabética
df['tags'] = df['tags'].apply(sorted)


## Salvar os arquivos

In [47]:
# Salvar arquivos no Google Drive
# importar biblioteca do google drive para salvar arquivos
from google.colab import drive

# montar google drive e aceitar permissão
drive.mount('/content/drive', force_remount=True)

# CSV - indicar local onde o arquivo será salvo 
path = '/content/drive/MyDrive/ANAC/regulamentos-anac-tags.csv'
# salvar arquivo em formato .csv com codificação utf-8
with open(path, 'w', encoding = 'utf-8-sig') as f:
  df.to_csv(f, index=False)

# TSV - indicar local onde o arquivo será salvo 
path = '/content/drive/MyDrive/ANAC/regulamentos-anac-tags.tsv'
# salvar arquivo em formato .csv com codificação utf-8
with open(path, 'w', encoding = 'utf-8-sig') as f:
  df.to_csv(f, sep="\t", index=False)

# JSON - indicar local onde o arquivo será salvo
path = '/content/drive/MyDrive/ANAC/regulamentos-anac-tags.json'
# salvar arquivo em formato .json
with open(path, 'w') as f:
  df.to_json(f, orient="records")

Mounted at /content/drive


In [49]:
# Fazer download dos arquivos do Google Drive
# Importar biblioteca para fazer download dos arquivos
from google.colab import files

# CSV - download do arquivo
files.download('/content/drive/MyDrive/ANAC/regulamentos-anac-tags.csv')
# TSV - download do arquivo
files.download('/content/drive/MyDrive/ANAC/regulamentos-anac-tags.tsv')
# JSON - download do arquivo
files.download('/content/drive/MyDrive/ANAC/regulamentos-anac-tags.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Rascunhos

In [None]:
## Teste gerando nova coluna com valores encontrados com padrão REGEX
import re

#import numpy as np
# df['tags']=df['ementa'].str.findall('altera', flags=re.IGNORECASE)

####### df['dados_nao_estruturados']=df['ementa']+df['norma'] # Unir colunas de dados para gerar tags

df['dados_nao_estruturados']=df[['ementa','norma']].agg(' | '.join, axis=1)

# df['tags']=df['dados_nao_estruturados'].str.findall('([0-9]{1,15}\.[0-9]{1,15})').str.join('|') # Gerar tags numéricas

df['tags']=df['dados_nao_estruturados'].str.lower().str.findall('([0-9]{1,13}\.[0-9]{1,15}|rbac|rbha|lei|decret\w+|decis\w+|crm|cnpj|cpf|portari\w+|isen\w+|cumpriment\w+ alternativ\w+|n[i|í]v\w+ equivalent\w+ d\w+ seguran\w+|diretri\w+ d\w+ aeronavegabilidade|certificad\w+ suplementar\w+ de tip\w+|embraer|alter\w+|modific\w+)', flags=re.IGNORECASE)

# corrige links errados na coluna anexos
# df['tags'] = df['tags'].replace('certificados suplementares de tipo', 'certificado suplementar de tipo')

df['tags']=df['tags'].apply(set).str.join("|") # Gera valores únicos para as tags e une com "|" 

df['tags']=df['tags'].map(lambda x: '|'.join(sorted(x.split('|')))) # mapeia a string para classificar os valores em ordem alfabética

# Substituir valores das strings

df['tags']=df['tags'].str.replace(r'certificad\w+ suplementar\w+ de tip\w+', 'cst', regex=True)

# Remover coluna dados_nao_estruturados
df.drop(columns=["dados_nao_estruturados"],inplace=True)

# Converter coluna tags em lista
df['tags'] = df['tags'].str.replace("|", ", ", regex=False).str.split(', ').tolist()

# Ordenar em ordem alfabética a coluna tags
df['tags'] = df['tags'].apply(sorted)

# df['tags'] = df['tags'].fillna(value='ausente')

#df['tags']=df['tags'].str[1:-1].str.split(',').tolist()

#df['tags']=list(map(str.lower,df['dados_nao_estruturados'].str.findall('([0-9]{1,13}\.[0-9]{1,15}|rbac|rbac-e|lei|decreto|crm|cnpj|cpf|portaria|isenção|cumprimento alternativo|altera)', flags=re.IGNORECASE).str.join("|")))

#np.unique(df[['Courses', 'Duration']].values)


#df['tags']=pd.unique(df['tags'].values.ravel())

##PARA KUMU - .str.join('|')
# df['tags']=df['dados_nao_estruturados'].str.findall('([0-9]{1,13}\.[0-9]{1,15}|rbac|rbac-e|lei|decreto|crm|cnpj|cpf|portaria|isenção|cumprimento alternativo|altera)', flags=re.IGNORECASE).str.join('|')  

# astype(str) e str.join('|')

### KUMU: https://docs.kumu.io/guides/import.html


#df['tags']=df['ementa'].str.extract(r'(^[0-9]{1,9}\.[0-9]{1,9})')
#df['tags']=df['ementa'].str.extract(r'(^w{5})')
df

In [10]:
df['tags']=df['tags'].str.replace(r'certificad\w+ suplementar\w+ de tip\w+', 'cst', regex=True)
df

AttributeError: ignored

Alguns recursos interessantes da exibição da tabela de dados:

- Clicando no botão `filter` no canto superior direito permite pesquisar termos ou valores em qualquer coluna específica.
- Clicar em qualquer título de coluna permite classificar os resultados de acordo com o valor dessa coluna.
- A tabela exibe apenas um subconjunto dos dados por vez. Você pode navegar pelas páginas de dados usando os controles no canto inferior direito.

## Ajustar tipos de dados (data, número, texto..)

In [None]:
from datetime import datetime

# Converter formato do campo 'data' para datetime
# df[['data']] = df[['data']].apply(pd.to_datetime)

df['data'] = pd.to_datetime(df['data'])

# df['data'] = pd.to_datetime(df['data']).dt.strftime("%Y-%m-%d")

# Converter todo o dataframe para string
#df = df.astype(str)

# Converter formato de dados em Dtype
#data_types_dicionario = {'ementa': str, 'norma': str, 'tornada_sem_efeito': str,	'alterada': str,	'data': str,	'outros'	tipo_normatico	publicacao	revogada	em_vigor	anexos	dados_nao_estruturados	
#df = df.astype(data_types_dicionario)

df.info()
df.describe(include='all')
df

## Salvar e exportar a base de dados com metadados (tags)

In [None]:
# importar biblioteca para salvar no google drive
from google.colab import drive

# montar google drive e indicar local onde o arquivo será salvo
drive.mount('/content/drive', force_remount=True)
path = '/content/drive/MyDrive/ANAC/regulamentos-anac-tags.csv'

# salvar arquivo em formato .csv com codificação utf-8
with open(path, 'w', encoding = 'utf-8-sig') as f:
  df.to_csv(f, index=False)

Mounted at /content/drive


In [None]:
# Regulamentos em formato TSV por conta de erro nos arquivos da ANAC, áspas duplas, etc.

# montar google drive e indicar local onde o arquivo será salvo
drive.mount('/content/drive', force_remount=True)
path = '/content/drive/MyDrive/ANAC/regulamentos-anac-tags.tsv'

# salvar arquivo em formato .csv com codificação utf-8
with open(path, 'w', encoding = 'utf-8-sig') as f:
  df.to_csv(f, sep="\t", index=False)

Mounted at /content/drive


In [None]:
# importar bilbioteca para fazer download do resultado
from google.colab import files

# converter arquivo .csv e fazer o download
files.download('/content/drive/MyDrive/ANAC/regulamentos-anac-tags.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# importar bilbioteca para fazer download do resultado
from google.colab import files

# converter arquivo .tsv e fazer o download
files.download('/content/drive/MyDrive/ANAC/regulamentos-anac-tags.tsv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# importar biblioteca para salvar no google drive
from google.colab import drive

# montar google drive e indicar local onde o arquivo será salvo
drive.mount('/content/drive', force_remount=True)
path = '/content/drive/MyDrive/ANAC/regulamentos-anac-tags.json'

# salvar arquivo em formato .json
with open(path, 'w') as f:
  df.to_json(f, orient="records")

Mounted at /content/drive


In [None]:
# importar bilbioteca para fazer download do resultado
from google.colab import files

# converter arquivo .json e fazer o download
files.download('/content/drive/MyDrive/ANAC/regulamentos-anac-tags.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Conteúdos para testar

## Conteúdos para testar

In [None]:
!pip install pandas-profiling
!pip install --upgrade pandas-profiling


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandas-profiling
  Downloading pandas_profiling-3.3.0-py2.py3-none-any.whl (268 kB)
[K     |████████████████████████████████| 268 kB 5.0 MB/s 
Collecting visions[type_image_path]==0.7.5
  Downloading visions-0.7.5-py3-none-any.whl (102 kB)
[K     |████████████████████████████████| 102 kB 55.3 MB/s 
Collecting statsmodels<0.14,>=0.13.2
  Downloading statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[K     |████████████████████████████████| 9.8 MB 65.0 MB/s 
[?25hCollecting requests<2.29,>=2.24.0
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.4 MB/s 
Collecting htmlmin==0.1.12
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
Collecting multimethod<1.9,>=1.4
  Downloading multimethod-1.8-py3

In [None]:
from pandas_profiling import ProfileReport

In [None]:
df['tipo_normatico'] = df['tipo_normatico'].astype('category',copy=False) #transformar a coluna 'tipo_normatico' em uma categoria (dado discreto)
df['data'] = pd.to_datetime(df['data']) # transformar data em tipo de dado de data

profile = ProfileReport(df)
profile

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [None]:
# importar biblioteca para salvar no google drive
from google.colab import drive

# montar google drive e indicar local onde o arquivo será salvo
drive.mount('/content/drive', force_remount=True)
path = '/content/drive/MyDrive/ANAC/regulamentos-anac-tags-report.html'

# salvar arquivo em formato .html com codificação utf-8
with open(path, 'w', encoding = 'utf-8-sig') as f:
  profile.to_file(output_file=path)


Mounted at /content/drive


Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
# importar bilbioteca para fazer download do resultado
from google.colab import files

# fazer o download do report .html
files.download('/content/drive/MyDrive/ANAC/regulamentos-anac-tags-report.html')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Testes ainda não concluídos


In [None]:
## Exportar dataframe para HTML com estilos

# importing the modules
from tabulate import tabulate
import pandas as pd
 
# displaying the DataFrame
print(tabulate(df, headers = 'keys', tablefmt = 'html'))


### Below are all the styles that you can use:

# “plain”
# “simple”
# “github”
# “grid”
# “fancy_grid”
# “pipe”
# “orgtbl”
# “jira”
# “presto”
# “pretty”
# “psql”
# “rst”
# “mediawiki”
# “moinmoin”
# “youtrack”
# “html”
# “latex”
# “latex_raw”
# “latex_booktabs”
# “textile”


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
## TESTES


# https://acervolima.com/divida-uma-string-em-colunas-usando-regex-no-pandas-dataframe/

'''
Método # 2:
Para quebrar a string, usaremos a Series.str.extract(pat, flags=0, expand=True)função. Aqui, pat se refere ao padrão que queremos pesquisar.

import pandas as pd
  
dict = {'movie_data':['The Godfather 1972 9.2',
                    'Bird Box 2018 6.8',
                    'Fight Club 1999 8.8'] }
  
# Convert the dictionary to a dataframe
df = pd.DataFrame(dict)
  
# Extract name from the string 
df['Name'] = df['movie_data'].str.extract('(\w*\s\w*)', expand=True)
   
# Extract year from the string 
df['Year'] = df['movie_data'].str.extract('(\d\d\d\d)', expand=True)
  
# Extract rating from the string 
df['Rating'] = df['movie_data'].str.extract('(\d\.\d)', expand=True)
'''

"\nMétodo # 2:\nPara quebrar a string, usaremos a Series.str.extract(pat, flags=0, expand=True)função. Aqui, pat se refere ao padrão que queremos pesquisar.\n\nimport pandas as pd\n  \ndict = {'movie_data':['The Godfather 1972 9.2',\n                    'Bird Box 2018 6.8',\n                    'Fight Club 1999 8.8'] }\n  \n# Convert the dictionary to a dataframe\ndf = pd.DataFrame(dict)\n  \n# Extract name from the string \ndf['Name'] = df['movie_data'].str.extract('(\\w*\\s\\w*)', expand=True)\n   \n# Extract year from the string \ndf['Year'] = df['movie_data'].str.extract('(\\d\\d\\d\\d)', expand=True)\n  \n# Extract rating from the string \ndf['Rating'] = df['movie_data'].str.extract('(\\d\\.\\d)', expand=True)\n"

In [None]:
# PANDOC

!pip install pandoc
import pandoc


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandoc
  Downloading pandoc-2.2.tar.gz (29 kB)
Collecting plumbum
  Downloading plumbum-1.7.2-py2.py3-none-any.whl (117 kB)
[K     |████████████████████████████████| 117 kB 3.3 MB/s 
[?25hCollecting ply
  Downloading ply-3.11-py2.py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 5.8 MB/s 
[?25hBuilding wheels for collected packages: pandoc
  Building wheel for pandoc (setup.py) ... [?25l[?25hdone
  Created wheel for pandoc: filename=pandoc-2.2-py3-none-any.whl size=29558 sha256=d1a949fda169caacecb82d776d40a82b8b3ea9e34648d8dbd749f502d1590998
  Stored in directory: /root/.cache/pip/wheels/e7/e3/1e/31f9b28058a3fc0b63be4076a4e9f6ac79c332bb7e1d6c4ec9
Successfully built pandoc
Installing collected packages: ply, plumbum, pandoc
Successfully installed pandoc-2.2 plumbum-1.7.2 ply-3.11


In [None]:
import requests

response = requests.get('https://ipfs.io/ipfs/QmZW9yEfDjNb9tJ9pCBDzbHc7Q1EXXexqcyjvQtZ2HQC4q/1568.json').json()

print(response)
print(response['description'])

JSONDecodeError: ignored

In [None]:
# Streamlit

https://www.youtube.com/watch?v=Sb0A9i6d320

Turn An Excel Sheet Into An Interactive Dashboard Using Python (Streamlit)

In [None]:
# https://www.w3schools.com/python/python_howto_remove_duplicates.asp

def unicos(x):
  return list(dict.fromkeys(x))
mylist = my_function(["a", "b", "a", "c", "c"])

In [None]:
!pip install sweetviz
import sweetviz as sv



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sweetviz
  Downloading sweetviz-2.1.4-py3-none-any.whl (15.1 MB)
[K     |████████████████████████████████| 15.1 MB 4.2 MB/s 
Installing collected packages: sweetviz
Successfully installed sweetviz-2.1.4


                                             |          | [  0%]   00:00 -> (? left)

KeyError: ignored

In [None]:
#You could specify which variable in your dataset is the target for your model creation. We can specify it using the target_feat parameter.
my_report = sv.analyze(df)
my_report.show_html()

                                             |          | [  0%]   00:00 -> (? left)

Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


In [None]:
!pip install lux-api


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lux-api
  Downloading lux-api-0.5.1.tar.gz (127 kB)
[K     |████████████████████████████████| 127 kB 4.1 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting sh
  Downloading sh-1.14.3.tar.gz (62 kB)
[K     |████████████████████████████████| 62 kB 1.1 MB/s 
Collecting psutil>=5.9.0
  Downloading psutil-5.9.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (281 kB)
[K     |████████████████████████████████| 281 kB 42.1 MB/s 
Collecting lux-widget>=0.1.4
  Downloading lux-widget-0.1.11.tar.gz (2.5 MB)
[K     |████████████████████████████████| 2.5 MB 50.3 MB/s 
Collecting iso3166
  Downloading iso3166-2.1.1-py3-none-any.whl (9.8 kB)
Collecting autopep8>=1.5
  Downloading autopep8-1.7.0-py2.py3-no

In [None]:
import lux
df

NameError: ignored

In [None]:
!pip install pandasgui
from pandasgui import show
gui = show(df)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandasgui
  Downloading pandasgui-0.2.13.tar.gz (215 kB)
[K     |████████████████████████████████| 215 kB 4.1 MB/s 
Collecting PyQt5
  Downloading PyQt5-5.15.7-cp37-abi3-manylinux1_x86_64.whl (8.4 MB)
[K     |████████████████████████████████| 8.4 MB 1.7 MB/s 
[?25hCollecting PyQt5-sip
  Downloading PyQt5_sip-12.11.0-cp37-cp37m-manylinux1_x86_64.whl (344 kB)
[K     |████████████████████████████████| 344 kB 57.4 MB/s 
[?25hCollecting PyQtWebEngine
  Downloading PyQtWebEngine-5.15.6-cp37-abi3-manylinux1_x86_64.whl (230 kB)
[K     |████████████████████████████████| 230 kB 55.2 MB/s 
Collecting pynput
  Downloading pynput-1.7.6-py2.py3-none-any.whl (89 kB)
[K     |████████████████████████████████| 89 kB 6.8 MB/s 
Collecting qtstylish>=0.1.2
  Downloading qtstylish-0.1.5.tar.gz (983 kB)
[K     |████████████████████████████████| 983 kB 36.6 MB/s 
Collecting jedi>=0.10
  Downlo

ImportError: ignored

## Streamlit app

In [None]:
# https://github.com/PablocFonseca/streamlit-aggrid

!pip install streamlit-aggrid

from st_aggrid import AgGrid
import pandas as pd

AgGrid(df)



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting streamlit-aggrid
  Downloading streamlit_aggrid-0.3.2-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 5.2 MB/s 
[?25hCollecting python-decouple<4.0,>=3.6
  Downloading python_decouple-3.6-py3-none-any.whl (9.9 kB)
Collecting streamlit>=0.87.0
  Downloading streamlit-1.13.0-py2.py3-none-any.whl (9.2 MB)
[K     |████████████████████████████████| 9.2 MB 40.9 MB/s 
Collecting pympler>=0.9
  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)
[K     |████████████████████████████████| 164 kB 57.4 MB/s 
Collecting semver
  Downloading semver-2.13.0-py2.py3-none-any.whl (12 kB)
Collecting blinker>=1.0.0
  Downloading blinker-1.5-py2.py3-none-any.whl (12 kB)
Collecting watchdog
  Downloading watchdog-2.1.9-py3-none-manylinux2014_x86_64.whl (78 kB)
[K     |████████████████████████████████| 78 kB 6.4 MB/s 
Collecting validators>=0.2
  Downloading validators

  command:

    streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py [ARGUMENTS]
2022-10-11 13:01:08.714 
  command:

    streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py [ARGUMENTS]


AgGridReturn(data=                                                  ementa  \
0      Altera a Lei Nº 6.009, de 26 de dezembro de 19...   
1      Altera dispositivos das Leis nos 7.920, de 12 ...   
2      Conversão da MP Nº 527, de 18 de maio de 2011,...   
3      Altera a lei Nº 8.112 , de 11 de dezembro de 1...   
4      Altera a lei Nº 10.683, de 28 de maio de 2003,...   
...                                                  ...   
24952  Autoriza a operação de sociedade empresária de...   
24953  Defere pedido de isenção temporária de cumprim...   
24954  Defere pedido de isenção temporária de cumprim...   
24955  Autoriza a operação de sociedade empresária de...   
24956  Defere pedido de extensão do prazo de isenção ...   

                                    norma tornada_sem_efeito alterada  \
0      Decreto-Lei nº 2.060 de 13/09/1983                               
1             Lei nº 12.648 de 17/05/2012                               
2             Lei nº 12.462 de 05/08/2011 

In [None]:
## USASNDO NRGOK para implantar app - https://medium.com/analytics-vidhya/deploy-flask-streamlit-apps-from-jupyter-notebook-and-google-colab-5c3729dc3647

#install required libraries
!pip install streamlit -q  

#ngrok for deployment
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip  
!unzip -qq ngrok-stable-linux-amd64.zip

[31mERROR: Operation cancelled by user[0m
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/pip/_vendor/pkg_resources/__init__.py", line 3021, in _dep_map
    return self.__dep_map
  File "/usr/local/lib/python3.7/dist-packages/pip/_vendor/pkg_resources/__init__.py", line 2815, in __getattr__
    raise AttributeError(attr)
AttributeError: _DistInfoDistribution__dep_map

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/cli/base_command.py", line 180, in _main
    status = self.run(options, args)
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/cli/req_command.py", line 199, in wrapper
    return func(self, options, args)
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/commands/install.py", line 319, in run
    reqs, check_supported_wheels=not options.target_dir
  File "/usr/local/lib/python3.7/dist-packages/pip/_in

In [None]:

#you may have to run this cell again if it throws an erro
get_ipython().system_raw('./ngrok http 8501 &')
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"
    
# this gives you the link on which your web app will be deployed

http://4b4c-34-73-251-2.ngrok.io


In [None]:
## Autenticar usuário no NGROK

!ngrok config add-authtoken 2FzYdYNBZqB2tXWlBVE35SVX2Li_uZopGZGo7YmnUgBAgEM5

/bin/bash: ngrok: command not found


In [None]:
#creates a file named app.py 

%%writefile app.py
import streamlit as st

from st_aggrid import AgGrid
import pandas as pd

AgGrid(df)

Overwriting app.py


In [None]:
#run the streamlit app
!streamlit run app.py

2022-10-11 14:02:34.302 INFO    numexpr.utils: NumExpr defaulting to 2 threads.

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.2:8501[0m
[34m  External URL: [0m[1mhttp://34.73.251.2:8501[0m
[0m


In [None]:
import requests
import os

# Set the URL and the directory to save the files
url = "https://sistemas.anac.gov.br/dadosabertos/normas-homol/regulamentacao/Normas%20Publicadas/"
save_dir = "csv_files"

# Make the save directory if it doesn't already exist
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# Send a request to the URL and get the response
response = requests.get(url)

# If the response status code is 200 (OK), proceed
if response.status_code == 200:
    # Get the list of all files on the page
    file_list = response.text.split("\n")

    # Iterate through the list of files
    for file_name in file_list:
        # Check if the file is a CSV file
        if file_name.endswith(".csv"):
            # Construct the full URL of the file
            file_url = url + file_name

            # Send a request to the file URL and get the response
            file_response = requests.get(file_url)

            # If the response status code is 200 (OK), save the file
            if file_response.status_code == 200:
                with open(os.path.join(save_dir, file_name), "wb") as f:
                    f.write(file_response.content)

ConnectionError: ignored