<br>

# Introdução


In [None]:
#!pip3 install PyPDF2


In [None]:
import os
import re
import shutil
import time
import urllib.parse
import urllib.request
from pathlib import Path

import numpy as np
import pandas as pd
import requests
from dotenv import dotenv_values, find_dotenv
from more_itertools import one
from my_driver import Driver
from paths import adds_path, data_path, driver_path, log_path, temp_path
from PyPDF2 import PdfFileMerger, PdfFileReader, PdfMerger, PdfReader
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait


In [None]:
# Credenciais
config = dotenv_values(find_dotenv(usecwd=True))

# Variáveis
CPF = config['CPF']
PASSWORD = config['PASSWORD']


<br>

## Create Driver


In [None]:
driver = Driver(
    my_driver_path=driver_path,
    my_logs_path=log_path,
    my_download_path=temp_path,
    verify_ssl=False,
)
driver.add_extension_xpath(adds_path)


<br>

## Faz Login


In [None]:
def fecha_popups():
    # Hide Popup Tour
    try:
        tour = driver.find_element(
            By.XPATH,
            '//*[@class="hopscotch-bubble animated tour-portalTour"]//*[@class="hopscotch-actions"]//..',
        )
        tour = tour.find_element(
            By.XPATH, './/*[@class="hopscotch-bubble-close hopscotch-close"]'
        )
        tour.click()
    except:
        pass


In [None]:
def get_login(cpf, password):
    """
    Faz o login

    :param cpf: _description_
    :type cpf: _type_
    :param password: _description_
    :type password: _type_
    """
    url = 'https://e.ambiente.sp.gov.br/atendimento/login'
    driver.get(url)
    time.sleep(1)

    try:
        # Clica em entrar
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located(
                (By.XPATH, '//a[contains(@aria-label, "Entrar")]')
            )
        ).click()
    except:
        pass

    try:
        # Entra com credenciais
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located(
                (By.XPATH, '//*[@id="identificacao"]')
            )
        ).send_keys(cpf)
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="senha"]'))
        ).send_keys(password)

        # Clica em entrar
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located(
                (
                    By.XPATH,
                    '//button[contains(@class, "sds-btn sds-btn--raised sds-btn--full-width")]',
                )
            )
        ).click()

    except Exception as e:
        print(e)


In [None]:
# Login
get_login(CPF, PASSWORD)


<br>

## Go To


In [None]:
def open_sidebar():
    """
    Mantem aba laberal aberta
    """
    # Retorna para a janela principal (fora do iframe)
    try:
        driver.find_element(By.XPATH, '//a[contains(@aria-expanded, "false")]')
        driver.find_element(By.XPATH, '//a[contains(@aria-expanded, "false")]').click()
        print('Abrindo menu lateral...')
    except:
        pass
    
    try:
        driver.find_element(By.XPATH, '//a[contains(@aria-expanded, "true")]')        
    except:
        pass
    time.sleep(1)
    

In [None]:
open_sidebar()


In [None]:
def go_to(my_option):
    """
    Vai pra aba "Meus Processos"
    """
    # Open Sidebar
    open_sidebar()

    options = [
        'Início',
        'Consulta de Processos',
        'Meus Processos',
        'Processos Liberados',
        'Meus Dados',
    ]

    # Checa se Está dentre as Opções
    if my_option in options:
        pass
    else:
        raise Exception(f'Precisa estar entre: {options}')

    # Clica na Opção
    if my_option.lower() in driver.title.lower():
        pass
    else:
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located(
                (By.XPATH, f'//*[@aria-label="{my_option}"]')
            )
        ).click()

    while driver.execute_script('return document.readyState') != 'complete':
        print(driver.execute_script('return document.readyState'))
        time.sleep(1)
    time.sleep(3)


In [None]:
go_to('Meus Processos')
go_to('Processos Liberados')
go_to('Meus Dados')


<br>

## Meus Processos


In [None]:
def meus_processos():
    """
    Pega dados dos "Meus Processos"

    :return: Tabela com dados dos Meus Processos "em andamento"
    :rtype: dataframe
    """
    # Open Sidebar
    open_sidebar()

    # Go Page
    go_to('Meus Processos')

    processos_xpath = driver.find_element(
        By.XPATH, '//section[@aria-label="Processos"]'
    )
    processos_andamento_xpath = processos_xpath.find_element(
        By.XPATH, './/div[contains(@class, "solicitacoes-EM_ANDAMENTO")]'
    )
    list_cards = processos_andamento_xpath.find_elements(
        By.XPATH, './/div[contains(@class, "solicitacao-card")]'
    )

    list_dicts = []
    for card in list_cards:
        title = card.find_element(By.XPATH, './/h2').text
        url = card.find_element(By.XPATH, './/a').get_attribute('href')
        processo = card.find_element(By.XPATH, './/p').text
        # print(processo)
        list_dicts.append({'tipo': title, 'url': url, 'processo': processo})

    # Dataframe
    return pd.DataFrame(list_dicts)


In [None]:
meus_processos()


<br>

## Processos Liberados


In [None]:
def processos_liberados():
    """
    Cria uma tabela com os processos liberados

    :return: Tabela com dados dos Processos Liberados
    :rtype: dataframe
    """
    # Open Sidebar
    open_sidebar()

    # Go Page
    go_to('Processos Liberados')

    # dddd
    processos_xpath = driver.find_element(
        By.XPATH, '//section[@aria-label="Processos"]'
    )
    list_cards = processos_xpath.find_elements(
        By.XPATH,
        './/section[contains(@class, "released-processes-card-container")]',
    )
    list_dicts = []
    for card in list_cards:
        title = card.find_element(By.XPATH, './/p').text
        url = card.find_element(By.XPATH, './/a').get_attribute('href')
        a = card.find_element(
            By.XPATH, '//div[contains(@class, "info-user-content")]'
        ).text
        processo = card.find_element(By.XPATH, './/p').text
        # print(a.split('\n'))
        # print(url)
        list_dicts.append({'tipo': title, 'url': url, 'processo': processo})

    # Dataframe
    return pd.DataFrame(list_dicts)


In [None]:
# Processos Liberados
df_processosliberados = processos_liberados()
df_processosliberados.head()


<br>

## Select Process


In [None]:
def select_processo(df_processosliberados):
    """
    Seleciona o Processo

    :param df_processosliberados: _description_
    :type df_processosliberados: _type_
    :return: _description_
    :rtype: _type_
    """
    list_index = list(df_processosliberados.index)
    if len(list_index) == 1:
        processo = one(list_index)
        msg = 'O único com acesso ativo.'

    else:
        # Input
        valores = ''.join('{}'.format(list(df_processosliberados.index)))
        processo = int(
            input(
                f'Insira o número do índice do processo. O número deve estar entre {valores}: '
            )
        )

        # Check se o processo está no índice
        while not (processo in list(df_processosliberados.index)):
            print(
                'Número não consta no índice. Por favor, insira um número válido!'
            )
            processo = int(
                input(
                    f'Insira o número do índice do processo. O número deve estar entre {valores}: '
                )
            )
            msg = f'Escolhido entre os {len(list_index)} processos com acesso ativo.'

    # Result
    processo_text = df_processosliberados.loc[processo, 'processo']
    print(f'O processo escolhido foi "{processo_text}".\n{msg}')
    return processo, processo_text


In [None]:
# Select Process
n_processo, processo_text = select_processo(df_processosliberados)


<br>

## get_file_list


In [None]:
def get_file_list(processo):
    """
    url_1 é a primeira url... que, ao abrir, já muda...
    """
    # Open Sidebar
    open_sidebar()

    # Go Page
    go_to('Processos Liberados')

    # Lista Processos
    tag = '//*[@class="sds-collapse-list sds-card liberacao-collapse-container"]//*[@class="sds-collapse-list__item"]'
    WebDriverWait(driver, 3).until(
        EC.presence_of_element_located((By.XPATH, tag))
    )
    list_process = driver.find_elements(By.XPATH, tag)

    # Minimiza todos os processos
    for process in list_process:
        try:
            tag = './/button[@class="sds-btn sds-btn--icon button-rotate"]'
            WebDriverWait(process, 1).until(
                EC.presence_of_element_located((By.XPATH, tag))
            ).click()

        except:
            pass

    # Seleciona Processo
    WebDriverWait(list_process[processo], 1).until(
        EC.presence_of_element_located(
            (By.XPATH, './/button[@class="sds-btn sds-btn--icon"]')
        )
    ).click()

    # Lista de todos os arquivos do processo
    tag = '//*[@id="secao-documentos{}"]//*[@class="sds-data-list"]//*[@class="sds-data-list__item"]'.format(
        processo
    )
    WebDriverWait(list_process[processo], 3).until(
        EC.presence_of_element_located((By.XPATH, tag))
    )
    files = driver.find_elements(By.XPATH, tag)

    # Loop
    list_name = []
    list_url = []
    for file in files:
        name = file.find_element(By.XPATH, './/span').text
        url = file.find_element(
            By.XPATH, './/*[@class="sds-link"]'
        ).get_attribute('href')
        list_name.append(name)
        list_url.append(url)

    # Dataframe
    df = pd.DataFrame(list(zip(list_name, list_url)), columns=['nome', 'url_1'])
    df['n_doc'] = df.index + 1
    return df[['n_doc', 'nome', 'url_1']]


In [None]:
# dddd
processos_xpath = driver.find_element(
    By.XPATH, '//section[@aria-label="Processos"]'
)
list_cards = processos_xpath.find_elements(
    By.XPATH,
    './/section[contains(@class, "released-processes-card-container")]',
)

# ddd
for card in list_cards:
    title = card.find_element(By.XPATH, './/p').text
    url = card.find_element(By.XPATH, './/a').get_attribute('href')
    a = card.find_element(
        By.XPATH, '//div[contains(@class, "info-user-content")]'
    ).text
    processo = card.find_element(By.XPATH, './/p').text
    print(title)
    processo_text22 = df_processosliberados.loc[n_processo, 'processo']
    if title == processo_text22:
        break

# dddd
anexos_xpath = card.find_element(
    By.XPATH, './/div[@class="released-processes-docs-container"]'
)
anexos_xpath.click()
files = anexos_xpath.find_elements(
    By.XPATH, './/div[@class="released-processes-docs-content"]'
)
# print(aaa)
# Loop
list_name = []
list_url = []
for file in files:
    name = file.text
    list_name.append(name)

    url = file.find_element(By.XPATH, './/a').get_attribute('href')
    list_url.append(url)
# Dataframe
df = pd.DataFrame(list(zip(list_name, list_url)), columns=['nome', 'url_1'])
df['n_doc'] = df.index + 1
df = df[['n_doc', 'nome', 'url_1']]
df


<br>

## get url redirects


In [None]:
def get_url_redirects(url):
    """
    url_2 é a url que tem uma barra em cima!
    url_3 é o link pro PDF... e se ZIP, dá msg que não é possível dowload. Parâmetros Importantes!!!!
    """
    # Get URL
    driver.get(url)
    time.sleep(2)

    while driver.execute_script('return document.readyState') != 'complete':
        time.sleep(2)

    # URL 2
    url1 = driver.current_url

    # URL 3
    driver.switch_to.default_content()
    iframe_1 = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, '//iframe[@id="content"]'))
    )
    url2 = iframe_1.get_attribute('src')
    driver.switch_to.frame(iframe_1)

    # URL Download
    try:
        # Se for PDF
        iframe_2 = WebDriverWait(driver, 2).until(
            EC.presence_of_element_located(
                (By.XPATH, '//*[@class="pdjJsFrameViewer"]')
            )
        )
        url_temp = iframe_2.get_attribute('src')
        url_download = urllib.parse.unquote(
            url_temp, encoding='utf-8', errors='replace'
        )
        url_download = url_download.replace(
            'go-docs-file-viewer/pdfjs/web/viewer.html?file=/', ''
        )
        pdf = True
    except:
        # Não é PDF
        url_download = None
        pdf = False

    # Results
    return list([url1, url2, url_download, pdf])


In [None]:
processo_text_rename = processo_text.replace('.', '_').replace('/', '-')
processo_text_rename


In [None]:
# Create Directory
processo_path = data_path / processo_text_rename
processo_path.mkdir(exist_ok=True)


In [None]:
# Lê Tabela
df = pd.read_csv(processo_path / 'tab_arquivos.csv')
df.info()
df.head()


In [None]:
# Adjust Table: Função 1: demoraaaaaaaa....
df[['url_2', 'url_3', 'url_download', 'pdf']] = df.apply(
    lambda x: get_url_redirects(x['url_1']), axis=1, result_type='expand'
)


In [None]:
df.head()


In [None]:
# Escreve Tabela
df.to_csv(processo_path / 'tab_arquivos.csv', index=False)


<br>

## Merge PDFs


In [None]:
def merge_files(processo_path):
    """
    Unifica todos os arquivos em um arquivo único.

    :param processo_path: _description_
    :type processo_path: _type_
    :return: _description_
    :rtype: _type_
    """
    # Filename
    a = processo_path / f'{"Processo Unificado"}.pdf'

    # List Files
    list_files = list(processo_path.glob('*.pdf'))
    list_files = set(list_files)
    list_files.discard(a)

    # Lista documentos da Pasta
    list_dict = []
    for file in list_files:
        # print(file.name)
        n = file.name.split(' - ', 1)[0]
        list_dict.append({'id': n, 'nomearquivo': file})

    # Dataframe: files
    df = pd.DataFrame(list_dict)
    df['id'] = pd.to_numeric(df['id'])
    df.sort_values(by=['id'], inplace=True)
    df.reset_index(inplace=True)
    df.drop(['index'], inplace=True, axis=1)

    # Call the PdfFileMerger
    merged_object = PdfMerger()
    for k, v in df.iterrows():
        bookmark = v['nomearquivo'].name
        merged_object.append(
            PdfReader(processo_path / f'{v["nomearquivo"]}', 'rb'), bookmark
        )

    # Write all the files into a file which is named as shown below
    merged_object.write(str(a))
    return f'Doc Ok!'


<br>

## Download


In [None]:
def is_download_finished(temp_folder):
    """
    https://stackoverflow.com/questions/34338897/python-selenium-find-out-when-a-download-has-completed
    """
    firefox_temp_file = sorted(Path(temp_folder).glob('*.part'))
    chrome_temp_file = sorted(Path(temp_folder).glob('*.crdownload'))
    downloaded_files = sorted(Path(temp_folder).glob('*.*'))
    if (
        (len(firefox_temp_file) == 0)
        and (len(chrome_temp_file) == 0)
        and (len(downloaded_files) == 1)
    ):
        return True
    else:
        return False


In [None]:
def download_selenium(path_search, path_move, out_filename):
    """
    _summary_

    :param path_search: _description_
    :type path_search: _type_
    :param path_move: _description_
    :type path_move: _type_
    :param out_filename: _description_
    :type out_filename: _type_
    :raises Exception: _description_
    """
    # ddd
    list_zipfiles = list(path_search.glob('*.*'))
    n_files = 0
    tentativa = 0
    while tentativa < 10 and n_files != 1:
        n_files = len(list_zipfiles)
        time.sleep(2)
        tentativa += 1

    tentativa2 = 0
    test_download = is_download_finished(path_search)
    while tentativa2 < 10 and not test_download:
        time.sleep(2)
        print(1)
        test_download = is_download_finished(path_search)
        tentativa2 += 1

    if len(list_zipfiles) == 0:
        time.sleep(1)

    if len(list_zipfiles) == 1:
        # Se tem o arquivo
        # TODO: Add *.part ou *.crdownload

        teste = list_zipfiles[0].is_file()
        while teste:
            shutil.move(
                max([f for f in list_zipfiles], key=os.path.getctime),
                path_move / f'{out_filename}',
            )
            time.sleep(2)

            teste = list_zipfiles[0].is_file()
        print(f'> Arquivo renomeado para "{out_filename}"')

    if len(list_zipfiles) > 1:
        time.sleep(1)
        print(f'Tem mais de arquivo no diretório "{path_search}"\nErro!!!!')
        raise Exception('Ajustar função "rename file"')


<br>

# Rull All


<br>

## Parte 1


In [None]:
# test2 = is_download_finished(temp_path)
# print(test2)
# tentativa = 0
# while not test2 and tentativa < 10:
#     test2 = is_download_finished(temp_path)
#     print(test2)
#     time.sleep(2)
#     tentativa += 1


In [None]:
driver


In [None]:
# Cria Driver
# create_driver(temp_path)


In [None]:
# Login
get_login(CPF, PASSWORD)


In [None]:
fecha_popups()


In [None]:
# Meus Processos
# df_meusprocessos = get_meusprocessos()
# df_meusprocessos.head()


In [None]:
# Processos Liberados
df_processosliberados = processos_liberados()
df_processosliberados.head()


In [None]:
# Select Process
# processo, processo_text = select_processo(df_processosliberados)


In [None]:
# Get File List
df = get_file_list(processo)
df.head()


In [None]:
# Adjust Table: Função 1: demoraaaaaaaa....
df[['url_2', 'url_3', 'url_download', 'pdf']] = df.apply(
    lambda x: get_url_redirects(x['url_1']), axis=1, result_type='expand'
)


In [None]:
# Create Directory
processo_path = data_path / processo_text
processo_path.mkdir(exist_ok=True)


In [None]:
# Escreve Tabela
df.to_csv(processo_path / 'tab_arquivos.csv', index=False)


<br>

## Parte 2


In [None]:
# Lê Tabela
df = pd.read_csv(processo_path / 'tab_arquivos.csv')
df.info()
df.head()


In [None]:
# Lê Tabela
df = pd.read_csv(processo_path / 'tab_arquivos.csv')
df.info()
df.head()

In [None]:
# Filter
mask = (df['n_doc'] >= 29) & (df['n_doc'] <= 50) & (df['pdf'] == True)
df = df[mask].copy()

# Results
df.head()


In [None]:
# https://stackoverflow.com/questions/29563335/how-do-i-load-session-and-cookies-from-selenium-browser-to-requests-library-in-p
# Get cookies
cookies = driver.get_cookies()

# Create session from cookies
s = requests.Session()
for cookie in cookies:
    s.cookies.set(cookie['name'], cookie['value'])


In [None]:
# Loop thought table
for k, v in df.iterrows():
    # Parametros
    n_doc = v['n_doc']
    url_download = v['url_download']
    url_2 = v['url_2']
    nome = v['nome']
    pdf = v['pdf']

    # Se for PDF
    if pdf:
        print(f'Download do PDF {n_doc} - {nome}\n{url_download}\n')

        # ddddddd
        r = s.get(url_download, stream=True)
        try:
            text = r.content.decode('utf-8')
            print(text)
        except:
            text = 'É PDF!'

        # dddd
        if (
            text
            != 'Não foi possível recuperar a liberação externa para este documento'
        ):
            # Salva Arquivo
            time.sleep(5)

            # Adjust Nome
            nome = nome.replace('.pdf ', ' ')
            nome = re.sub('[\\/:"*?<>|]+', '', nome)

            filename = processo_path / f'{n_doc} - {nome}.pdf'
            if r.status_code == 200:
                with open(filename, 'wb') as f:
                    for chunk in r:
                        f.write(chunk)

        elif (
            text
            == 'Não foi possível recuperar a liberação externa para este documento'
        ):
            print('Arrumar esta merda!')

    elif not pdf:
        print(f'Download do arquivo {n_doc} - {nome}\n{url_2}\n')

        # sss
        driver.get(url_2)
        driver.implicitly_wait(20)

        # Retorna para a janela principal (fora do iframe)
        driver.switch_to.default_content()

        # Muda o foco para o iframe
        iframe = WebDriverWait(driver, 3).until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="content"]'))
        )
        driver.switch_to.frame(iframe)
        time.sleep(1)

        # Clica em Download
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="btnDownload"]'))
        ).click()
        time.sleep(20)

        # Move
        nome = nome.replace('.zip ', ' ').replace('.ZIP ', ' ')
        download_selenium(temp_path, processo_path, f'{n_doc} - {nome}.zip')


<br>

## Parte 3


In [None]:
# Merge PDF
merge_files(processo_path)


In [None]:
# Fecha Driver
driver.quit()


In [None]:
#
temp_path.rmdir()


In [None]:
shutil.rmtree(temp_path)
