## Imports

In [1]:
import pandas as pd
import numpy as np
import pm4py

from datetime import datetime, timedelta

## Loads

In [2]:
log_file_path = "./log_2_geral_datas_distribuidas.csv"

df = pd.read_csv(log_file_path, encoding = "utf-8")
df.head()

Unnamed: 0,tarefa,dt_inicio,demanda_id,cliente,tp_demanda,dt_fim
0,Estimar_Tamanho,2021-04-26 18:06:40,261,cliente1,evolutiva,2021-04-28 12:10:06
1,Validar_Requisitos,2021-04-26 18:11:42,261,cliente1,evolutiva,2021-04-28 01:56:57
2,Projetar_Arquitetura,2021-04-26 18:16:45,261,cliente1,evolutiva,2021-04-27 17:33:12
3,Projetar_Modelo_Dados,2021-04-26 18:21:46,261,cliente1,evolutiva,2021-04-30 13:58:17
4,Fisicalizar_Modelo_Dados,2021-04-26 18:26:47,261,cliente1,evolutiva,2021-04-30 14:33:04


In [3]:
df = pm4py.format_dataframe(df, case_id='demanda_id', 
                                activity_key='tarefa', 
                                timestamp_key='dt_inicio')

log = pm4py.convert_to_event_log(df) # Transformando em log

In [13]:
def simplify_log(log, level, by_top=True):
    """
    Simplifies a log by taking out unpopular variantes.

    Parameters:
        log    (pm4py.objects.log) : Log
        level  (int)               : 3,5,7,9 or 12; 3 "muito baixo"; 12 "muito alto"
                                     in the case of by_top=False, percentage
        by_top (bool)              : If the filtering will be by the top apearances 
                                     instead of percentage cut
    Returns:
        filtered_log  (pm4py.objects.log) : Simplified log
    """
    # Filtering by top k apearences (default)
    if by_top:
        filtered_log = pm4py.filter_variants_top_k(log, level)
        return filtered_log
    # Filter by percentage 
    filtered_log = pm4py.filter_variants_by_coverage_percentage(log, percentage_cut)
    return filtered_log


In [14]:
simplify_log(log, 5)

[]

# Atividades

Esta função engloba:
* 3.1.1
* 3.1.2
* 3.1.3

In [None]:
from datetime import timedelta
def listar_demanda(log):
    response = []
    for trace in log:
      obj = {}
      obj ['cliente'] = trace.attributes['cliente']
      obj ['case_id'] = trace.attributes['concept:name']
      eventos = {}
      for event in trace:
        tarefa = event['tarefa']
        if(tarefa not in eventos):
          eventos[tarefa] = {
            'nome_atividade': tarefa,
            'primeira_ocorrencia': event['dt_inicio'],
            'quantidade': 0,
            'tempo': timedelta(0)
          }
        eventos[tarefa]['quantidade'] += 1
        eventos[tarefa]['tempo'] += event['dt_fim'] - event['dt_inicio']
      obj ['atividades'] = eventos
      response.append(obj)
    return response


print(len(listar_demanda(LOGS)))

Essa função engloba:
- 3.3.1
- 3.3.5
- 3.3.6
- 3.3.7
- 3.3.8

In [79]:
def activity_info(demand : pm4py.objects.log.obj.Event, order : bool = False):
    """
    Returns each unique activity in a demand and its # of ocurrences.

    Parameters:
        demand (pm4py.objects.log.obj.Event) : demand to be analysed
        order (bool) : if activities are ordered by # of ocurrences decreasing
    Returns:
        activities (dict) : dict with activities as key and its information as values
            inside of this dict we have:
            key: activity
            value:
                dict (activity)
                    keys :
                        - freq
                        - name
                        - first_occurrence 
                        - total_duration
                        - occurrences (list of dicts)
                            dict
                                - executor
                                - start
                                - finish
                                - duration
    """
    activities = {} # Dict onde as chaves sao nomes unicos de atividades de uma dada demanda
    for activity in demand:
        name = activity['tarefa'] # Tarefa diz o nome da atividade (basicamente o id)

        # Try except para checar se a tarefa já foi acrescentada como chave do dicionario
        try:
            # Somando a quantidade de ocorrencias e alterando a primeira ocorrencia registrada se for o caso
            activities[name]['freq'] += 1
            if activity['dt_inicio'] < activities[name]['first_occurrence']:
                activities[name]['first_occurrence'] = activity['dt_inicio']
        except:
            # Inicializando atividade, que tambem é um dict de informações
            activities[name] = {}
            activities[name]['freq'] = 1
            activities[name]['name'] = name
            activities[name]['first_occurrence'] = activity['dt_inicio']
            activities[name]['total_duration'] = timedelta(days=0)
            activities[name]['occurrences'] = []
        
        # Aqui temos o dicionario associado a cada ocorrencia
        ocur_info = {}
        ocur_info['executor'] = activity['cliente']                        # Quem fez
        ocur_info['start'] = activity['dt_inicio']                         # Inicio
        ocur_info['finish'] = activity['dt_fim']                           # Fim
        ocur_info['duration'] = activity['dt_fim'] - activity['dt_inicio'] # Duração
        # Ocurrences é um array de dicionários com as informações das ocorrências da atividade
        activities[name]['occurrences'].append(ocur_info)

        activities[name]['total_duration'] = activities[name]['total_duration'] + ocur_info['duration']
    
    if order:
        activities = {k : v for k, v in sorted(activities.items(), key=lambda item: item[1]['freq'], reverse=True)}

    return activities

In [80]:
a = activity_info(log[0], order=True)

In [81]:
a

{'Analise_Risco_Produto': {'freq': 42,
  'name': 'Analise_Risco_Produto',
  'first_occurrence': Timestamp('2021-05-31 19:41:45+0000', tz='UTC'),
  'total_duration': Timedelta('84 days 01:17:46'),
  'occurrences': [{'executor': 'cliente1',
    'start': Timestamp('2021-05-31 19:41:45+0000', tz='UTC'),
    'finish': Timestamp('2021-06-02 14:57:12+0000', tz='UTC'),
    'duration': Timedelta('1 days 19:15:27')},
   {'executor': 'cliente1',
    'start': Timestamp('2021-05-31 20:07:09+0000', tz='UTC'),
    'finish': Timestamp('2021-06-02 10:01:56+0000', tz='UTC'),
    'duration': Timedelta('1 days 13:54:47')},
   {'executor': 'cliente1',
    'start': Timestamp('2021-05-31 20:12:12+0000', tz='UTC'),
    'finish': Timestamp('2021-06-04 19:21:53+0000', tz='UTC'),
    'duration': Timedelta('3 days 23:09:41')},
   {'executor': 'cliente1',
    'start': Timestamp('2021-05-31 20:32:24+0000', tz='UTC'),
    'finish': Timestamp('2021-06-04 09:03:34+0000', tz='UTC'),
    'duration': Timedelta('3 days 12