In [11]:
import pandas

def import_csv(file_path):
    event_log = pandas.read_csv(file_path, sep=';')
    num_events = len(event_log) # numero de eventos (fases que a pessoa vai passar)
    num_cases = len(event_log.case_id.unique()) # numero de casos (numero de pessoas)
    print("Number of events: {}\nNumber of cases: {}".format(num_events, num_cases))


if __name__ == "__main__":
    import_csv("Y:\\git\\process ming\\grupo-2\\Bases_de_Dados\\running-example.csv")


Number of events: 42
Number of cases: 6


In [13]:
import pandas
import pm4py


def import_csv(file_path):
    event_log = pandas.read_csv(file_path, sep=';') # carregamento do log de eventos
    event_log = pm4py.format_dataframe(event_log, case_id='case_id', activity_key='activity', timestamp_key='timestamp')
    # ^transforma a tabela de dados de eventos em um formato que pode ser usado por qualquer algoritmo de mineração de processos em pm4py
    # ^cria uma cópia do log de eventos de entrada e renomeia as colunas atribuídas para nomes de colunas padronizados usados ​​em pm4py
    start_activities = pm4py.get_start_activities(event_log) # pega as atividades que ocorrem primeiro no log de eventos
    end_activities = pm4py.get_end_activities(event_log) # pega as atividades que ocorrem por último no log de eventos
    print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities))

if __name__ == "__main__":
    import_csv("Y:\\git\\process ming\\grupo-2\\Bases_de_Dados\\running-example.csv")

Start activities: {'register request': 6}
End activities: {'reject request': 3, 'pay compensation': 3}


In [None]:
# exemplo de importação de arquivos .xes
import pm4py
def import_xes(file_path):
    event_log = pm4py.read_xes(file_path)
    start_activities = pm4py.get_start_activities(event_log) # pega as atividades que ocorrem primeiro no log de eventos
    end_activities = pm4py.get_end_activities(event_log) # pega as atividades que ocorrem por último no log de eventos
    print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities))

if __name__ == "__main__":
    import_xes("C:/Users/demo/Downloads/running-example.xes")

In [None]:
# armazenando um quadro de dados (log de eventos) do Pandas como um arquivo csv
import pandas as pd
import pm4py

if __name__ == "__main__":
    event_log = pm4py.format_dataframe(pd.read_csv('C:/Users/demo/Downloads/running-example.csv', sep=';'), case_id='case_id',
    activity_key='activity', timestamp_key='timestamp') # lendo um quadro de dados do panda
    event_log.to_csv('C:/Users/demo/Desktop/running-example-exported.csv') # convertendo para um arquivo csv

In [None]:
import pm4py

if __name__ == "__main__":
    event_log = pm4py.read_xes('C:/Users/demo/Downloads/running-example.xes') # arquivo XES que será exportado
    df = pm4py.convert_to_dataframe(event_log) # convertendo as informações lidas para um quadro de dados
    df.to_csv('C:/Users/demo/Desktop/running-example-exported.csv') # arquivo CSV que vai receber a importação 


In [22]:
# carregando um arquivo .xes
def import_xes(file_path):
    event_log = pm4py.read_xes(file_path)
    start_activities = pm4py.get_start_activities(event_log)
    end_activities = pm4py.get_end_activities(event_log)
    print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities))

if __name__ == "__main__":
    import_xes("Y:\\git\\process ming\\grupo-2\\Bases_de_Dados\\running-example.xes")

parsing log, completed traces :: 100%|██████████| 6/6 [00:00<00:00, 5247.25it/s]

Start activities: {'register request': 6}
End activities: {'reject request': 3, 'pay compensation': 3}





In [26]:
# testando filtros, usando arquivos .xes
import pm4py
import datetime as dt

if __name__ == "__main__":
    log = pm4py.read_xes('Y:\\git\\process ming\\grupo-2\\Bases_de_Dados\\running-example.xes')

    # filtra as atividades que ocorrem no começo de um rastreamento
    filtered = pm4py.filter_start_activities(log, {'register request'})

    
    filtered = pm4py.filter_start_activities(log, {'register request TYPO!'})

    # filtra as atividades que ocorrem no final de um rastreamento
    filtered = pm4py.filter_end_activities(log, {'pay compensation'})

    filtered = pm4py.filter_event_attribute_values(log, 'org:resource', {'Pete', 'Mike'})

    filtered = pm4py.filter_event_attribute_values(log, 'org:resource', {'Pete', 'Mike'}, level='event')

    filtered = pm4py.filter_trace_attribute_values(log, 'concept:name', {'3', '4'})

    filtered = pm4py.filter_trace_attribute_values(log, 'concept:name', {'3', '4'}, retain=False)

    filtered = pm4py.filter_variants(log, [
        ['register request', 'check ticket', 'examine casually', 'decide', 'pay compensation']])

    filtered = pm4py.filter_variants(log, [
        ['register request', 'check ticket', 'examine casually', 'decide', 'reject request']])

    filtered = pm4py.filter_directly_follows_relation(log, [('check ticket', 'examine casually')])

    filtered = pm4py.filter_eventually_follows_relation(log, [('examine casually', 'reject request')])

    filtered = pm4py.filter_time_range(log, dt.datetime(2010, 12, 30), dt.datetime(2010, 12, 31), mode='events')

    filtered = pm4py.filter_time_range(log, dt.datetime(2010, 12, 30), dt.datetime(2010, 12, 31),
                                       mode='traces_contained')

    filtered = pm4py.filter_time_range(log, dt.datetime(2010, 12, 30), dt.datetime(2010, 12, 31),mode='traces_intersecting')

parsing log, completed traces :: 100%|██████████| 6/6 [00:00<00:00, 4895.12it/s]




In [84]:
# filtrando a hora dos casos
import pandas
import pm4py
import datetime
import pytz

if __name__ == "__main__":
    log = pandas.read_csv('Y:\\git\\process ming\\grupo-2\\Bases_de_Dados\\running-example_3.csv', sep = ';')
    # listaAtividades = log['activity']
    listaPessoas = list(log.case_id)
    listaHoras = list(log.timestamp)
    listaHora2 = []
    for hora in listaHora:
        dataConvertida = datetime.datetime.strptime(hora, "%Y-%m-%d %H:%M:%S%z")
        listaHora2.append(f'{dataConvertida.hour}:{dataConvertida.minute}:{dataConvertida.second}')
    # listaHora = pd.to_datetime(log['timestamp']).dt.strftime('%H:%M:%S')
    print(listaPessoas)
    print(listaHora2)
    # print(listaAtividades)
    # listaHora = pd.to_datetime(log['time:timestamp']).dt.strftime('%H:%M:%S')
    # for (i, atividade) in enumerate(listaAtividades):
    #     print('atividade: ',atividade)
    #     print('hora: ',listaHora[i])
        

[3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4]
['14:32:0', '15:6:0', '16:34:0', '9:18:0', '12:18:0', '13:6:0', '11:43:0', '9:55:0', '10:45:0', '11:32:0', '12:12:0', '14:16:0', '11:22:0', '12:5:0', '11:2:0', '10:6:0', '15:12:0', '11:18:0', '14:24:0', '15:2:0', '16:6:0', '16:22:0', '16:52:0', '11:47:0', '9:2:0', '10:16:0', '11:22:0', '13:28:0', '16:18:0', '14:33:0', '15:50:0', '11:18:0', '12:48:0', '9:6:0', '11:34:0', '13:12:0', '14:56:0', '15:2:0', '12:6:0', '14:43:0', '12:2:0', '15:44:0']
