# Modules used

In [None]:
import win32com.client as client
import datetime as datetime
import pandas as pd
import pdfplumber
import re
import os

from pathlib import Path

# Global variables

In [None]:
## Folder where the automation is located
dir = Path(__file__).parent.resolve()

## Month of reference as invoices
date = datetime.date.today().strftime("%m/%Y")

## Outlook connection + parameters
outlook = client.Dispatch("Outlook.Application")
namespace = outlook.GetNamesPace('MAPI')
account = 'company@email.com'
folder = 'folder'
sender = 'partner@email.com'

# Save extract

In [None]:
def get_extract(email, folder, sender):

    ## Selects the right account and folder
    account = namespace.Folders[email]
    inbox = account.Folders[folder]
    
    ## Searches for the email with the invoice
    BHS_email = [message for message in inbox.Items if message.SenderEmailAddress == sender and pd.to_datetime(message.ReceivedTime, utc = True).strftime("%m/%Y") == date]
    
    for message in BHS_email:
        ## Recovers only the attachments (win32com)
        attachments = message.Attachments

        for pdf in attachments:
            ## Selects the correct attachment
            if pdf.FileName[-8:] == "_ext.pdf":
                ## Saves the attachment
                pdf.SaveAsFile(dir / str(pdf.FileName))
                return pdf.filename

bhs_pdf = get_extract(account, folder, sender)
print("Invoice obtained")

# Read extract

In [None]:
def read_extract(extract):

    with pdfplumber.open(dir / str(extract)) as pdf:
        
        # Criando dataframe
        ext_data = pd.DataFrame(columns=["Email","Plano","Status","Valor"])
        i = 0
        
        for pages in pdf.pages:
            content = pages.extract_text()          # Extrai o texto das páginas    
            filter = re.compile(r'^Power|Plano | Exchange | Microsoft | Flow')    # Filtro RegEx para as linhas das licenças
            
            ## Obter os dados para cada licença
            ## https://regex101.com/
            for license in content.split('\n'):     # Recupera apenas as linhas das licenças
                if filter.match(license):
                    
                    ### Método 1 usando dict in lists.
                    # Plano
                    plano = str(license.split('|')[0])  
                    
                    # Status
                    status = re.findall(r'\w+í\w+', license)
                    status = str(status[0]) if status else None
                    
                    # Email
                    email = re.findall(r'\S+\w+@', license)                                 ## https://regex101.com/r/xoITe4/1
                    email = str(email[0] + "manchesterinvest.com.br") if email else None
                    
                    # Custo
                    valor = re.findall(r'\w+,\w+', license)
                    valor = float(valor[0].replace(",", ".")) if valor else None
                    
                    # Adicionando dados ao dataframe
                    ext_data.loc[i] = [
                        email,
                        plano,
                        status,
                        valor
                    ]
                    
                    # Próxima linha
                    i += 1
                    
        return ext_data
    
licenses = read_extract(bhs_pdf)
print("Licenses identified")

# Read employee data

In [None]:
def get_employees():

    _ = 'https://finadvisors.warpapp.com.br/Integracao/ExportacaoAgenteDetalhe?id_ProcessoExportacaoSaida=47&ds_ProcessoExportacaoSaida=Agentes%20-%20Lista%20Detalhada&ds_DescricaoProcesso=Lista%20com%20dados%20de%20Agentes'
    ## Pegar os usuários daqui com selenium
    
    doc = 'Colaboradores.xlsx'                                                                          ## Adicionar aqui o retorno da fonte de dados
    data = pd.read_excel(dir / doc)[1:]                                                                 ## Retorna os valores exceto index 0
    header = pd.read_excel(dir / doc).iloc[0]                                                           ## Pega o index 0
    data.columns = header.values                                                                        ## Promove o index 0 a cabeçalho ## .values coloca apenas os valores do index 0
    user_data = data.loc[:, ['Cód. Interno', 'e-Mail', 'Dt. Desligamento']].reset_index(drop=True)      ## retorna apenas estas duas colunas ## .reset_index atualiza o index da planilha
    
    return user_data

employees = get_employees()
print("Employees data obtained")

# Analysis

In [None]:
def do_analysis(invoice, users):

    users = pd.DataFrame(users)
    invoice = pd.DataFrame(invoice)
    
    dismissed_users = users[~users['Dt. Desligamento'].isna()]                                                              ## Pega apenas os usuários desligados # ~ indica não e .isna() pega os valores nulos
    dismissed_invoice = pd.merge(dismissed_users, invoice, how='inner', left_on='e-Mail', right_on='Email')                 ## Mescla as duas tabelas com base no Email
    dismissed_licensed = dismissed_invoice[~dismissed_invoice['Email'].isna()]                                              ## Retorna apenas os usuários desligados da tabela
    dismissed_licensed_total = dismissed_licensed[['Valor']].sum()                                                          ## Soma o total de valor
    dismissed_licensed = dismissed_licensed[['Cód. Interno', 'Dt. Desligamento', 'Email', 'Plano', 'Status', 'Valor']]      ## Retorna apenas as colunas mencionadas

    licenses_available = invoice[invoice['Status'] == "Disponível"]                                                         ## Retorna apenas as licencas disponíveis da tabela
    licenses_available_total = licenses_available[['Valor']].sum()                                                          ## Soma o total de valor
    
    return dismissed_licensed, dismissed_licensed_total, licenses_available, licenses_available_total

results = do_analysis(licenses, employees)
print("Analysis done")

# Send report

In [None]:
def send_report(analysis, invoice):

    desligados = analysis[0].to_html(border = 1, index=False, justify="left")
    total_desligados = analysis[1]['Valor']
    disponiveis = analysis[2].to_html(border = 1, index=False, justify="left")
    total_disponiveis = analysis[3]['Valor']
    mail = outlook.CreateItem(0)
    attachment = str(dir / invoice)
    
    texto_HTML = """<body>
        <div>
            <p>
                Hello,
                <br/><br/>
                Within the {ref} invoice, the following licenses should have been disabled":
                <br/><br/>
                {desligados_html}
                <br/>
                Adding up: R$ {sum_desligados:.2f}
            </p>
        </div>
        <br/>
        <div>
            <p>
                These licenses are unassigned:
    		    <br/><br/>
                {disponiveis_html}
    		    <br/>
                Adding up: R$ {sum_disponiveis:.2f}
            </p>
        </div>
    	<br/>
        <div>
            <p>
                Best regards,
                <br/>
                500000 Automation, Caio Pavesi
            </p>
        </div>
    </body>"""

    mail.Subject = f"Microsoft assigned licenses analysis {date}"
    mail.Attachments.Add(attachment)
    mail.BodyFormat = 1
    mail.HTMLBody = texto_HTML.format(desligados_html = desligados, sum_desligados = total_desligados, disponiveis_html = disponiveis, sum_disponiveis = total_disponiveis, ref = date)
    mail.To = 'other.company@email.com'
    mail.Send()
    
    return None

send_report(results, bhs_pdf)
print("Report sent")

# Folder cleanup

In [None]:
def organize_folder(users, invoice):

    os.remove(dir / invoice)


    return None

organize_folder(employees, bhs_pdf)
print("5S feito")