In [None]:
# import das bibliotecas
import pyspark
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
import numpy as np
import pandas as pd
import collections
import os
from os.path import isfile, isdir, join
import pm4py
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualization

In [None]:
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars file:///home/jovyan/jdbc/postgresql-42.2.17.jar pyspark-shell'

In [None]:
db_url = "jdbc:postgresql://postgres:5432/dbinovacnj"
db_name = "dbinovacnj"
db_user = "inovacnj"
db_pass = "inovacnj@admin"
db_driver = "org.postgresql.Driver"

In [None]:
# inicialização do spark
conf = SparkConf() \
        .setMaster("local[2]") \
        .setAppName("LendoDB") \
        .set("spark.executor.memory", "4g") \
        .set("spark.driver.memory", "4g") \
        .set("spark.driver.maxResultSize", "2g") \
        .set("spark.ui.enabled", "true") \
        .set("spark.sql.shuffle.partitions" , "800") \
        .set("spark.sql.execution.arrow.pyspark.enabled" , "false") \

spark = SparkSession \
    .builder \
    .config(conf=conf) \
    .getOrCreate()

sc = spark.sparkContext

In [None]:
# definindo o schema dos dados
schema = StructType([
    StructField("dadosBasicos", StructType([
        StructField("assunto", ArrayType(
            StructType([
                StructField("assuntoLocal", StructType([
                    StructField("codigoAssunto", LongType(), True),
                    StructField("codigoPaiNacional", LongType(), True),
                    StructField("descricao", StringType(), True)
                ]), True),
                StructField("codigoNacional", LongType(), True),
                StructField("principal", BooleanType(), True)
            ]),
        ), True),
        StructField('classeProcessual', LongType(), True),
        StructField('codigoLocalidade', StringType(), True),
        StructField('competencia', StringType(), True),
        StructField('dataAjuizamento', StringType(), True),
        StructField('dscSistema', StringType(), True),
        StructField('nivelSigilo', LongType(), True),
        StructField('numero', StringType(), True),
        StructField("orgaoJulgador", StructType([
            StructField("codigoMunicipioIBGE", LongType(), True),
            StructField("codigoOrgao", StringType(), True),
            StructField("instancia", StringType(), True),
            StructField("nomeOrgao", StringType(), True)
        ]), True),
        StructField('procEl', LongType(), True),
        StructField("tamanhoProcesso", StringType(), True),
        StructField("totalAssuntos", LongType(), True),
        StructField("valorCausa", StringType(), True)       
    ]), True),
    StructField("grau", StringType(), True),
    StructField("millisInsercao", LongType(), True),
    StructField("movimento", ArrayType(     
        StructType([
            StructField("complementoNacional", ArrayType(
                StructType([
                    StructField("codComplemento", LongType(), True),
                    StructField("codComplementoTabelado", LongType(), True),
                    StructField("descricaoComplemento", StringType(), True),
                ])
            ), True),
            StructField("dataHora", StringType(), True),
            StructField("idDocumentoVinculado", ArrayType(
                StringType(),
            ), True),
            StructField("identificadorMovimento", StringType(), True),
            StructField("movimentoLocal", StructType([
                StructField('codigoMovimento', LongType(), True),
                StructField('codigoPaiNacional', LongType(), True)
            ]), True),
            StructField("movimentoNacional", StructType([
                StructField('codigoNacional', LongType(), True)
            ]), True),
            StructField("nivelSigilo", StringType(), True),
            StructField("orgaoJulgador", StructType([
                StructField("codigoMunicipioIBGE", LongType(), True),
                StructField("codigoOrgao", StringType(), True),
                StructField("instancia", StringType(), True),
                StructField("nomeOrgao", StringType(), True)
            ]), True),
            StructField("tipoDecisao", StringType(), True),
            StructField("tipoResponsavelMovimento", StringType(), True)
        ]),
    ), True),
    StructField("siglaTribunal", StringType(), True)
])

In [None]:
# carrega do dataframe de classes
df_classes = spark.read \
    .option("header","true") \
    .option("inferSchema","true") \
    .option("delimiter",";") \
    .csv("./base/sgt_classes.csv")

df_classes.createOrReplaceTempView("classes")
   
df_qry_classes = spark.sql(
    "SELECT " +
    "codigo AS cod," + 
    "descricao," + 
    "sigla," + 
    "cod_pai AS codpai " +    
    "FROM classes "
)

# df_qry_classes.write \
#     .mode("overwrite") \
#     .format("jdbc") \
#     .option("url", db_url).option("user", db_user).option("password", db_pass).option("driver", db_driver) \
#     .option("dbtable", "inovacnj.classe") \
#     .save()

print("tabela inovacnj.classe criada.")

In [None]:
# carrega do dataframe de assuntos
df_assuntos = spark.read \
    .option("header","true") \
    .option("inferSchema","true") \
    .option("delimiter",";") \
    .csv("./base/sgt_assuntos.csv")

df_assuntos.createOrReplaceTempView("assuntos")
   
df_qry_assuntos = spark.sql(
    "SELECT " +
    "codigo AS cod," + 
    "descricao," + 
    "cod_pai AS codpai " +    
    "FROM assuntos "
)

# df_qry_assuntos.write \
#     .mode("overwrite") \
#     .format("jdbc") \
#     .option("url", db_url).option("user", db_user).option("password", db_pass).option("driver", db_driver) \
#     .option("dbtable", "inovacnj.assunto") \
#     .save()

print("tabela inovacnj.assunto criada.")

In [None]:
# carrega o dataframe de movimentos
df_movimentos = spark.read \
    .option("header","true") \
    .option("inferSchema","true") \
    .option("delimiter",";") \
    .csv("./base/sgt_movimentos.csv")

# cria uma view temporaria dos movimentos
df_movimentos.createOrReplaceTempView("movimentos")

# carrega o dataframe de movimentos nacionais (com nossa classificacao de fases e natureza)
df_movimentosNac = spark.read \
    .option("header","true") \
    .option("inferSchema","true") \
    .option("delimiter",";") \
    .csv("./base/MovimentosNacionais.csv")

df_movimentosNac.createOrReplaceTempView("movimentos_nac")

df_qry_movimentosNac = spark.sql(
    "SELECT " +
    "trim(substring_index(MOVIMENTO, '-', 1)) AS codmovimento, " + 
    "trim(substring_index(MOVIMENTO, '-', -1)) AS descmovimento, " + 
    "CASE WHEN NATUREZA IS NULL THEN 'GERAL' ELSE NATUREZA END AS natureza, " +
    "CASE WHEN FASE IS NULL THEN 'F0 - NÃO CLASSIFICADO' ELSE FASE END AS fase " +
    "FROM movimentos_nac " +
    "WHERE RELEVANCIA = 1"
)

df_movimentos_join = df_movimentos \
    .join(df_qry_movimentosNac, df_movimentos["codigo"] == df_qry_movimentosNac["codmovimento"], "left")

df_movimentos_join.createOrReplaceTempView("movimentos_com_fase")

df_qry_movimentos = spark.sql(
    "SELECT " +
    "codigo AS cod," + 
    "descricao," + 
    "natureza, " +
    "fase, " +
    "cod_pai AS codpai " +    
    "FROM movimentos_com_fase "
)

# df_qry_movimentos.write \
#     .mode("overwrite") \
#     .format("jdbc") \
#     .option("url", db_url).option("user", db_user).option("password", db_pass).option("driver", db_driver) \
#     .option("dbtable", "inovacnj.movimentocnj") \
#     .save()

print("tabela inovacnj.movimentocnj criada.")

In [None]:
# carrega do dataframe de serventias
df_serventias = spark.read \
    .option("header","true") \
    .option("inferSchema","true") \
    .option("delimiter",";") \
    .csv("./base/mpm_serventias.csv")

df_serventias.createOrReplaceTempView("serventias")

df_qry_serventias = spark.sql(
    "SELECT " +
    "SEQ_ORGAO AS cod, " + 
    "DSC_ORGAO AS descricao, " + 
    "SEQ_ORGAO_PAI AS codpai, " + 
    "TIP_ORGAO AS sigla_tipoj, " + 
    "DSC_TIP_ORGAO AS tipo_oj, " + 
    "DSC_CIDADE AS cidade, " + 
    "SIG_UF AS uf, " + 
    "COD_IBGE AS codibge, " + 
    "TIP_ESFERA_JUSTICA AS esfera " + 
    "FROM serventias "
)

# df_qry_serventias.write \
#     .mode("overwrite") \
#     .format("jdbc") \
#     .option("url", db_url).option("user", db_user).option("password", db_pass).option("driver", db_driver) \
#     .option("dbtable", "inovacnj.orgao_julgador") \
#     .save()

print("tabela inovacnj.orgao_julgador criada.")

In [None]:
# carregamento de todos os arquivos em um único DataFrame e geracao do CSV
basedir = "./base"

dirs_ramos_justica = [join(basedir, f) for f in os.listdir(basedir) if isdir(join(basedir, f))]

is_first = True

for dir_ramo_just in dirs_ramos_justica:
    print("Iniciando carregamento do ramo de justica: " + dir_ramo_just)
    dirs_tribunais = [join(dir_ramo_just, f) for f in os.listdir(dir_ramo_just) if isdir(join(dir_ramo_just, f))]
    
    for dir_trib in dirs_tribunais:
        print("Iniciando carregamento do tribunal: " + dir_trib)
        
        arquivos = [join(dir_trib, f) for f in os.listdir(dir_trib) if isfile(join(dir_trib, f))]
        
        df_union_tribunal = spark.createDataFrame(spark.sparkContext.emptyRDD(), schema)
        
        for arq in arquivos:
            if arq.endswith(".DS_Store") :
                continue
                
            print("Carregando dataframe do arquivo: " + arq)
            df = spark.read.schema(schema).json(arq)
            df_union_tribunal = df_union_tribunal.union(df)
        
        # Cria uma view temporaria para o dataframe
        df_union_tribunal.createOrReplaceTempView("proc_movimentos")
        
        # Query para formato em CSV
        df_query_distinct = spark.sql(
            "SELECT DISTINCT " + 
            "siglaTribunal AS codtribunal, " + 
            "grau, " +
            "millisinsercao, " +

            "dadosBasicos.classeProcessual AS codclasse, " +
            "dadosBasicos.codigoLocalidade AS codlocalidade, " +
            "dadosBasicos.competencia, " +
            "to_timestamp(dadosBasicos.dataAjuizamento, 'yyyyMMddHHmmss') AS dtajuizamento, "
            "dadosBasicos.dscSistema AS descsistema, " +
            "dadosBasicos.nivelSigilo AS nivelsigilo, " +
            "dadosBasicos.numero AS npu, " + 
            "dadosBasicos.orgaoJulgador.codigoMunicipioIBGE AS oj_codibge, " +
            "dadosBasicos.orgaoJulgador.codigoOrgao AS oj_cod, " +
            "dadosBasicos.orgaoJulgador.instancia AS oj_instancia, " +
            "dadosBasicos.orgaoJulgador.nomeOrgao AS oj_descricao, " +
            "dadosBasicos.procEl AS tramitacao, " +
            "dadosBasicos.tamanhoProcesso AS tamanhoprocesso, " +
            "dadosBasicos.valorCausa AS valorcausa, " +

            "exp_assunto.assunto.codigoNacional AS ass_cod, " +
            "exp_assunto.assunto.principal AS ass_principal, " + 
            "exp_assunto.assunto.assuntoLocal.codigoAssunto AS ass_codlocal, " +
            "exp_assunto.assunto.assuntoLocal.codigoPaiNacional AS ass_codpainacional, " +
            "exp_assunto.assunto.assuntoLocal.descricao AS ass_desclocal, " +

            "to_timestamp(exp_movimento.movimento.dataHora, 'yyyyMMddHHmmss') AS mov_dtmov, " +
            "exp_movimento.movimento.movimentoLocal.codigoMovimento AS mov_codlocal, " +
            "exp_movimento.movimento.movimentoLocal.codigoPaiNacional AS mov_codpainacional, " +
            "exp_movimento.movimento.movimentoNacional.codigoNacional AS mov_cod, " +
            "exp_movimento.movimento.nivelSigilo AS mov_nivelsigilo, " +

            "exp_movimento.movimento.orgaoJulgador.codigoMunicipioIBGE as mov_oj_codibge, " +
            "exp_movimento.movimento.orgaoJulgador.codigoOrgao as mov_oj_cod, " +
            "exp_movimento.movimento.orgaoJulgador.instancia as mov_oj_instancia, " +
            "exp_movimento.movimento.orgaoJulgador.nomeOrgao as mov_oj_descricao, " +

            "exp_movimento.movimento.tipoDecisao as mov_tpdecisao, " +
            "exp_movimento.movimento.tipoResponsavelMovimento as mov_tprespmov " +

            "FROM proc_movimentos " + 
            "LATERAL VIEW explode(dadosBasicos.assunto) exp_assunto as assunto " +
            "LATERAL VIEW explode(movimento) exp_movimento as movimento " + 
            "WHERE to_timestamp(dadosBasicos.dataAjuizamento, 'yyyyMMddHHmmss') >= to_timestamp('20000101000000', 'yyyyMMddHHmmss') " + 
            "AND exp_movimento.movimento.movimentoNacional.codigoNacional NOT IN(581, 85, 12270, 12271) " + 
            "AND size(proc_movimentos.movimento) > 0 "
            "AND (proc_movimentos.movimento[0].movimentoNacional.codigoNacional IN (26, 12474) " +
            "AND proc_movimentos.movimento[size(proc_movimentos.movimento) -1].movimentoNacional.codigoNacional IN (22, 246)) "
        )
        
        df_movimentos_join = df_query_distinct \
           .join(df_qry_movimentosNac, df_query_distinct["mov_cod"] == df_qry_movimentosNac["codmovimento"], "left") \
           .select( \
                col("codtribunal"), col("grau"), col("millisinsercao"), col("codclasse"), \
                col("codlocalidade"), col("competencia"), col("dtajuizamento"), col("descsistema"), \
                col("nivelsigilo"), col("npu"), col("oj_codibge"), col("oj_cod"), \
                col("oj_instancia"), col("oj_descricao"), col("tramitacao"), col("tamanhoprocesso"), \
                col("valorcausa"), col("ass_cod"), col("ass_principal"), col("ass_codlocal"), \
                col("ass_codpainacional"), col("ass_desclocal"), col("mov_dtmov"), col("mov_codlocal"), \
                col("mov_codpainacional"), col("mov_cod"), col("mov_nivelsigilo"), col("mov_oj_codibge"), \
                col("mov_oj_cod"), col("mov_oj_instancia"), col("mov_oj_descricao"), col("mov_tpdecisao"), \
                col("mov_tprespmov"), col("natureza"), col("fase") \
        )
        
        df_query_distinctPd = df_movimentos_join.toPandas()
        df_query_distinctPd.to_csv('./output/movimentos_tribunais.csv', mode='a', header=is_first, sep = ";", index=False, chunksize=1000)
        
        if is_first == True:
            is_first = False
            df_movimentos_join.repartition(5).write \
                .mode("overwrite") \
                .format("jdbc") \
                .option("url", db_url).option("user", db_user).option("password", db_pass).option("driver", db_driver) \
                .option("dbtable", "inovacnj.fat_movimentos_te") \
                .option("batchsize", "10000") \
                .save()
        else :
            df_movimentos_join.repartition(5).write \
                .mode("append") \
                .format("jdbc") \
                .option("url", db_url).option("user", db_user).option("password", db_pass).option("driver", db_driver) \
                .option("dbtable", "inovacnj.fat_movimentos_te") \
                .option("batchsize", "10000") \
                .save()

        print("Finalizando carregamento do tribunal: " + dir_trib)
        
    print("Finalizando carregamento do ramo de justica: " + dir_ramo_just)
    
print("Carregamento dos arquivos finalizado.")


In [None]:
# carrega do dataframe com todos os movimentos dos processos
df_proc_movimentos = spark.read \
    .option("header","true") \
    .option("inferSchema","true") \
    .option("delimiter",";") \
    .csv("./output/movimentos.csv")

# Cria uma view temporaria para o dataframe
df_proc_movimentos.createOrReplaceTempView("temp_proc_movimentos")
df_proc_movimentos.printSchema()
df_proc_movimentos.show(1, False)

In [None]:
def gerar_modelo_pm_from_df(df, codtribunal, grau, natureza, codclasse):
    
    df.createOrReplaceTempView("temp_df_modelo_pm")
    
    sql = "SELECT npu, fase, to_timestamp(mov_dtmov, 'yyyy-MM-dd HH:mm:ss') as dtmov "
    sql+= "FROM temp_df_modelo_pm "
    sql+= "WHERE (1=1) "
    if codtribunal != None :
        sql+= "AND codtribunal = '" + codtribunal + "' "
    if grau != None :
        sql+= "AND grau = '" + grau + "' "
    if natureza != None :
        sql+= "AND natureza = '" + natureza + "' "
    if codclasse != None :
        sql+= "AND codclasse = " + str(codclasse) + " "
        
    sql+= "ORDER BY dtmov ASC "
    
    df_logeventos = spark.sql(sql)
    
    df_logeventos_pd = df_logeventos.toPandas()
    dataframe = pm4py.format_dataframe(df_logeventos_pd, case_id='npu', activity_key='fase', timestamp_key='dtmov')
    eventLog = pm4py.convert_to_event_log(dataframe)

    dfg = dfg_discovery.apply(eventLog, variant=dfg_discovery.Variants.PERFORMANCE)
    gviz = dfg_visualization.apply(dfg, log=eventLog, variant=dfg_visualization.Variants.PERFORMANCE)
    #dfg_visualization.view(gviz)
    
    return gviz

In [None]:
def gerar_modelo_pm_from_db(codtribunal, grau, natureza, codclasse):
    
    sql = "SELECT npu, fase, mov_dtmov "
    sql+= "FROM inovacnj.fat_movimentos_tjpe "
    sql+= "WHERE (1=1) "
    if codtribunal != None :
        sql+= "AND codtribunal = '" + codtribunal + "' "
    if grau != None :
        sql+= "AND grau = '" + grau + "' "
    if natureza != None :
        sql+= "AND natureza = '" + natureza + "' "
    if codclasse != None :
        sql+= "AND codclasse = " + str(codclasse) + " "
        
    sql+= "ORDER BY mov_dtmov ASC "
    
    df_logeventos = spark.read \
        .format("jdbc") \
        .option("url", "jdbc:postgresql://postgres:5432/dbinovacnj") \
        .option("query", sql) \
        .option("user", "inovacnj") \
        .option("password", "inovacnj@admin") \
        .option("driver", "org.postgresql.Driver") \
        .load()
    
    df_logeventos_pd = df_logeventos.toPandas()
    dataframe = pm4py.format_dataframe(df_logeventos_pd, case_id='npu', activity_key='fase', timestamp_key='mov_dtmov')
    eventLog = pm4py.convert_to_event_log(dataframe)

    dfg = dfg_discovery.apply(eventLog, variant=dfg_discovery.Variants.PERFORMANCE)
    gviz = dfg_visualization.apply(dfg, log=eventLog, variant=dfg_visualization.Variants.PERFORMANCE)
    #dfg_visualization.view(gviz)
    
    return gviz

In [None]:
gviz = gerar_modelo_pm_from_db('TJPE', 'G1', 'GERAL', None)
dfg_visualization.view(gviz)

In [None]:
from flask import Flask
from flask import request
from flask import send_file

app = Flask(__name__)

@app.route('/download')
def downloadFile ():
    #For windows you need to use drive name [ex: F:/Example.pdf]
    path = "/Examples.pdf"
    return send_file(path, as_attachment=True)

@app.route("/testeflask")
def teste():
    param1 = request.args.get('teste')
    response = "Enviou param1 = " + param1

    return response

app.run(host='0.0.0.0', port='8080')

In [None]:
df_logeventos = spark.sql(
    "SELECT npu, fase, to_timestamp(mov_dtmov, 'yyyy-MM-dd HH:mm:ss') as dtmov " + 
    "FROM temp_proc_movimentos " + 
    "WHERE fase IS NOT NULL " + 
    "AND natureza = 'GERAL' " + 
    "ORDER BY mov_dtmov ASC"
)

logMovimentosDfPD = df_logeventos.toPandas()
dataframe = pm4py.format_dataframe(logMovimentosDfPD, case_id='npu', activity_key='fase', timestamp_key='dtmov')
eventLog = pm4py.convert_to_event_log(dataframe)

dfg = dfg_discovery.apply(eventLog, variant=dfg_discovery.Variants.PERFORMANCE)
gviz = dfg_visualization.apply(dfg, log=eventLog, variant=dfg_visualization.Variants.PERFORMANCE)
dfg_visualization.view(gviz)


In [None]:
df_logeventos = spark.sql(
    "SELECT npu, fase, to_timestamp(mov_dtmov, 'yyyy-MM-dd HH:mm:ss') as dtmov " + 
    "FROM temp_proc_movimentos " + 
    "WHERE fase IS NOT NULL " + 
    "AND natureza = 'CRIMINAL' " + 
    "ORDER BY mov_dtmov ASC"
)

logMovimentosDfPD = df_logeventos.toPandas()
dataframe = pm4py.format_dataframe(logMovimentosDfPD, case_id='npu', activity_key='fase', timestamp_key='dtmov')
eventLog = pm4py.convert_to_event_log(dataframe)

dfg = dfg_discovery.apply(eventLog, variant=dfg_discovery.Variants.PERFORMANCE)
gviz = dfg_visualization.apply(dfg, log=eventLog, variant=dfg_visualization.Variants.PERFORMANCE)
dfg_visualization.view(gviz)


In [None]:
df_logeventos = spark.sql(
    "SELECT npu, fase, to_timestamp(mov_dtmov, 'yyyy-MM-dd HH:mm:ss') as dtmov " + 
    "FROM temp_proc_movimentos " + 
    "WHERE fase IS NOT NULL " + 
    "AND natureza = 'CIVEL' " + 
    "ORDER BY mov_dtmov ASC"
)

logMovimentosDfPD = df_logeventos.toPandas()
dataframe = pm4py.format_dataframe(logMovimentosDfPD, case_id='npu', activity_key='fase', timestamp_key='dtmov')
eventLog = pm4py.convert_to_event_log(dataframe)

dfg = dfg_discovery.apply(eventLog, variant=dfg_discovery.Variants.PERFORMANCE)
gviz = dfg_visualization.apply(dfg, log=eventLog, variant=dfg_visualization.Variants.PERFORMANCE)
dfg_visualization.view(gviz)


In [None]:
df_qry_test = spark.sql(
    "SELECT DISTINCT fase " + 
    "FROM proc_movimentos_join " + 
    "WHERE fase IS NOT NULL " + 
    "AND natureza = 'CIVEL' "
)

df_qry_test.show()

In [None]:
# testando o carregamento de um arquivo com o schema
df_test = spark.read.schema(schema).json("./base/justica_estadual/processos-tjap/processos-tjap_2.json")
# Cria uma view temporaria para o dataframe
df_test.createOrReplaceTempView("test")

# Query para formato em CSV
df_qry_test = spark.sql(
    "SELECT DISTINCT " + 
    
    "dadosBasicos.numero, " +
    "dadosBasicos.classeProcessual, " +
    "to_timestamp(exp_movimento.movimento.dataHora, 'yyyyMMddHHmmss') as movimento_dataHora, "
    "exp_movimento.movimento.movimentoNacional.codigoNacional as movimentoNacional_codigoNacional "

    "FROM test " + 
    "LATERAL VIEW explode(movimento) exp_movimento as movimento " + 
    "WHERE dadosBasicos.dataAjuizamento >= 946684800000 AND " + 
    "exp_movimento.movimento.movimentoNacional.codigoNacional NOT IN(581, 85, 12270, 12271) " +
    "AND size(test.movimento) > 0 AND " + 
    "(test.movimento[0].movimentoNacional.codigoNacional IN (26, 12474) " +
    "AND test.movimento[size(test.movimento) -1].movimentoNacional.codigoNacional IN (22, 246)) " +
    "ORDER BY movimento_dataHora ASC"
)

df_qry_test = df_qry_test.join( \
    df_qry_movimentos, df_qry_test["movimentoNacional_codigoNacional"] == df_qry_movimentos["cod"], "inner")

df_qry_test.createOrReplaceTempView("test_eventlog")

df_qry_test = spark.sql(
    "SELECT numero, fase, movimento_dataHora " + 
    "FROM test_eventlog " + 
    "WHERE fase IS NOT NULL " + 
    "AND natureza = 'CRIMINAL' " + 
    "ORDER BY movimento_dataHora ASC"
)

df_qry_test.show()

In [None]:
logMovimentosDfPD = df_qry_test.toPandas()
dataframe = pm4py.format_dataframe(logMovimentosDfPD, case_id='numero', activity_key='fase', timestamp_key='movimento_dataHora')
eventLog = pm4py.convert_to_event_log(dataframe)

In [None]:
for case_index, case in enumerate(eventLog):
    print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
    for event_index, event in enumerate(case):
        print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))

In [None]:
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualization

dfg = dfg_discovery.apply(eventLog, variant=dfg_discovery.Variants.PERFORMANCE)
gviz = dfg_visualization.apply(dfg, log=eventLog, variant=dfg_visualization.Variants.PERFORMANCE)
dfg_visualization.view(gviz)
