## EDA Logs
### Diners

* Release Date : 2025-04-10

In [1]:
# Libraries
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timezone
import json

In [2]:
def read_and_clean_logs(csv_path):
    """
    Lee el archivo CSV 'muestra_logs.csv' con problemas de delimitación y comillas,
    y separa automáticamente en filas de tipo Request y Response.
    Devuelve dos DataFrames: df_requests, df_responses.
    """
    lines = []
    with open(csv_path, 'r', encoding='utf-8') as f:
        for line in f:

            cleaned_line = line.rstrip().rstrip(';').strip()
            if cleaned_line.startswith('"') and cleaned_line.endswith('"'):
                cleaned_line = cleaned_line[1:-1]
            cleaned_line = cleaned_line.replace('""','"')
            lines.append(cleaned_line)
    parsed_data = list(csv.reader(lines, delimiter=','))
    header = parsed_data[0]
    data_rows = parsed_data[1:]

    df_res_rows = []
    df_req_rows = []
    for row in data_rows:
        
        if len(row) == len(header):
            df_res_rows.append(row)
        else:
            df_req_rows.append(row)

    df_res = pd.DataFrame(df_res_rows, columns=header)
    df_req = pd.DataFrame(df_req_rows)

    return df_req, df_res


df_req, df_res = read_and_clean_logs('../muestra_logs.csv')



In [3]:
df_res['codigoEstadoHttp'] = pd.to_numeric(df_res['codigoEstadoHttp'], errors='coerce')
df_res['tiempoProceso'] = (
    df_res['tiempoProceso']
    .str.replace('ms','', regex=False)
    .str.strip()
)
df_res['tiempoProceso'] = pd.to_numeric(df_res['tiempoProceso'], errors='coerce')
df_res['timestamp_iso'] = pd.to_datetime(df_res['timestamp_iso'], errors='coerce')
df_res

Unnamed: 0,app,timestamp,ambiente,appname,clase,class,codigoEstadoHttp,date,dns_id,endPoint,...,thread,tiempoProceso,time,timestamp_iso,tipoLog,tipoMensaje,transaccionId,trazaExcepcion,ts,versionAplicacion
0,e929031e-e92e-432c-945b-fb35914cf7b6,1744144227,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,905d8055-c913-43e0-8155-39a631e71fbb,/experience/operational-services/issued-device...,...,http-nio-8082-exec-29,349.0,2025-04-08T17:49:58.069830731Z,2025-04-08 20:30:27+00:00,AUDITORIA,Response,,,,1.3.27
1,8cf10c73-960a-4154-88cb-797182d04252,1744142790,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,4872aad0-6ebb-49c5-97e0-e433f0acd420,/experience/operational-services/issued-device...,...,http-nio-8082-exec-10,391.0,2025-04-08T17:49:49.300689809Z,2025-04-08 20:06:30+00:00,AUDITORIA,Response,,,,1.3.27
2,0733c311-b155-4de9-9702-3cd291cda508,1744143808,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,24c61fcd-8400-41c9-8941-5f02961e0e44,/experience/operational-services/issued-device...,...,http-nio-8082-exec-14,351.0,2025-04-08T17:49:55.695166421Z,2025-04-08 20:23:28+00:00,AUDITORIA,Response,,,,1.3.27
3,d72536c8-79ec-49ea-9dab-2dcd5c6a0ec7,1744139788,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,d81abdaf-d9c2-49ef-979b-fcfcd6b7b2c0,/experience/operational-services/issued-device...,...,http-nio-8082-exec-7,385.0,2025-04-08T17:49:31.718503945Z,2025-04-08 19:16:28+00:00,AUDITORIA,Response,,,,1.3.27
4,6da8a82c-5094-421c-95ad-2989485c0417,1744145250,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,ddeb3c3c-976e-4516-be0b-70b5c7163828,/experience/operational-services/issued-device...,...,http-nio-8082-exec-55,367.0,2025-04-08T17:50:03.930113345Z,2025-04-08 20:47:30+00:00,AUDITORIA,Response,,,,1.3.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,fc25625b-3cf4-420a-a720-e54fcdff7f7a,1744139668,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,69607763-da7e-41c4-8386-f06d6d8e6f43,/experience/operational-services/issued-device...,...,http-nio-8082-exec-51,334.0,2025-04-08T17:49:31.012539535Z,2025-04-08 19:14:28+00:00,AUDITORIA,Response,,,,1.3.27
151,8e1480b6-0018-4164-bba6-9411a09b0233,1744139249,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,4206ad8f-c398-4feb-8600-5fd12e4451a2,/experience/operational-services/issued-device...,...,http-nio-8082-exec-33,400.0,2025-04-08T17:49:28.62217964Z,2025-04-08 19:07:29+00:00,AUDITORIA,Response,,,,1.3.27
152,19be6644-32c9-448d-9c08-777bdbb93873,1744141048,dev,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,862bcbf1-921c-4c12-a574-d271cd8d3422,/experience/operational-services/issued-device...,...,http-nio-8082-exec-48,409.0,2025-04-08T17:49:38.932651386Z,2025-04-08 19:37:28+00:00,AUDITORIA,Response,,,,1.3.27
153,638b3fca-6ba9-4e9d-a5b0-aefd830ff2fe,1744094847,dev,msd-crc-partauth-assessment,ec.com.dinersclub.dddmodules.application.rest....,,422.0,2025-04-08,81b0a8ce-10a1-4385-ab5f-9f3de3ef1fdd,/experience/cross-channel/party-authentication...,...,http-nio-8082-exec-7,186017.0,2025-04-08T06:46:39.161658Z,2025-04-08 06:47:27+00:00,ERROR,Response,,"""org.springframework.web.client.ResourceAccess...",,1.3.86


In [4]:
df_res['ambiente'] = df_res['ambiente'].replace('', np.nan, inplace=True)
df_res['appname'] = df_res['appname'].replace('', np.nan)
df_res['timestamp'] = pd.to_datetime(df_res['timestamp'], unit='s')
#df_res['fechaGuardado'] = pd.to_datetime(df_res['fechaGuardado'], unit='s')
df_res['file'] = df_res['file'].replace('', np.nan)
df_res['clase'] = df_res['clase'].replace('', np.nan)
df_res['class'] = df_res['class'].replace('', np.nan)
df_res['dns_id'] = df_res['dns_id'].replace('', np.nan)
df_res['endPoint'] = df_res['endPoint'].replace('', np.nan) 
df_res['id'] = df_res['id'].replace('', np.nan)
df_res['host'] = df_res['host'].replace('', np.nan)
df_res['kubernetes'] = df_res['kubernetes'].replace('', np.nan)
df_res['level']= df_res['level'].str.capitalize()
df_res['log'] = df_res['log'].replace('', np.nan)
df_res['logger'] = df_res['logger'].replace('', np.nan)
df_res['method'] = df_res['method'].replace('', np.nan)
df_res['metodo'] = df_res['metodo'].replace('', np.nan)
df_res['metodoHttp'] = df_res['metodoHttp'].replace('', np.nan)
df_res['msg'] = df_res['msg'].replace('', np.nan)
df_res['name'] = df_res['name'].replace('', np.nan)
df_res['namespace'] = df_res['namespace'].replace('', np.nan)
df_res['operacionId'] = df_res['operacionId'].replace('', np.nan)
df_res['pod'] = df_res['pod'].replace('', np.nan)
df_res['thread'] = df_res['thread'].replace('', np.nan)
df_res['tipoLog'] = df_res['tipoLog'].replace('', np.nan).str.capitalize()
df_res['tipoMensaje'] = df_res['tipoMensaje'].replace('', np.nan)
df_res['transaccionId'] = df_res['transaccionId'].replace('', np.nan)
df_res['trazaExcepcion'] = df_res['trazaExcepcion'].replace('', np.nan)
df_res['ts'] = df_res['ts'].replace('', np.nan)
df_res['versionAplicacion'] = df_res['versionAplicacion'].replace('', np.nan)
df_res.head()

Unnamed: 0,app,timestamp,ambiente,appname,clase,class,codigoEstadoHttp,date,dns_id,endPoint,...,thread,tiempoProceso,time,timestamp_iso,tipoLog,tipoMensaje,transaccionId,trazaExcepcion,ts,versionAplicacion
0,e929031e-e92e-432c-945b-fb35914cf7b6,2025-04-08 20:30:27,,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,905d8055-c913-43e0-8155-39a631e71fbb,/experience/operational-services/issued-device...,...,http-nio-8082-exec-29,349.0,2025-04-08T17:49:58.069830731Z,2025-04-08 20:30:27+00:00,Auditoria,Response,,,,1.3.27
1,8cf10c73-960a-4154-88cb-797182d04252,2025-04-08 20:06:30,,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,4872aad0-6ebb-49c5-97e0-e433f0acd420,/experience/operational-services/issued-device...,...,http-nio-8082-exec-10,391.0,2025-04-08T17:49:49.300689809Z,2025-04-08 20:06:30+00:00,Auditoria,Response,,,,1.3.27
2,0733c311-b155-4de9-9702-3cd291cda508,2025-04-08 20:23:28,,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,24c61fcd-8400-41c9-8941-5f02961e0e44,/experience/operational-services/issued-device...,...,http-nio-8082-exec-14,351.0,2025-04-08T17:49:55.695166421Z,2025-04-08 20:23:28+00:00,Auditoria,Response,,,,1.3.27
3,d72536c8-79ec-49ea-9dab-2dcd5c6a0ec7,2025-04-08 19:16:28,,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,d81abdaf-d9c2-49ef-979b-fcfcd6b7b2c0,/experience/operational-services/issued-device...,...,http-nio-8082-exec-7,385.0,2025-04-08T17:49:31.718503945Z,2025-04-08 19:16:28+00:00,Auditoria,Response,,,,1.3.27
4,6da8a82c-5094-421c-95ad-2989485c0417,2025-04-08 20:47:30,,msd-ops-isdvadmn-crdbraretrieve,ec.com.dinersclub.dddmodules.bian.rest.*******,,200.0,2025-04-08,ddeb3c3c-976e-4516-be0b-70b5c7163828,/experience/operational-services/issued-device...,...,http-nio-8082-exec-55,367.0,2025-04-08T17:50:03.930113345Z,2025-04-08 20:47:30+00:00,Auditoria,Response,,,,1.3.27


In [7]:
def procesar_mensaje(mensaje_raw):
    """
    Limpia y convierte el string JSON contenido en la columna 'mensaje'
    en un diccionario Python.
    """
    try:
        if mensaje_raw.strip() == "":
            return {}
        mensaje_limpio = mensaje_raw.replace('""', '"').strip('"')
        return json.loads(mensaje_limpio)
    except Exception:
        return {}

mensajes_dicts = df_res['mensaje'].apply(procesar_mensaje)

df_mensajes = pd.json_normalize(mensajes_dicts)

In [8]:
df_mensajes.head()

Unnamed: 0,*******.*******,providerCode,statusType,origin,description,transactionDate,message,statusCode,status,statusInstanceRecord.statusCode,statusInstanceRecord.description
0,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
1,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
2,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
3,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
4,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,


In [9]:
df_mensajes.rename(columns={'*******.*******': 'PIMC0'}, inplace=True)
df_mensajes.columns

Index(['PIMC0', 'providerCode', 'statusType', 'origin', 'description',
       'transactionDate', 'message', 'statusCode', 'status',
       'statusInstanceRecord.statusCode', 'statusInstanceRecord.description'],
      dtype='object')

In [10]:
df_mensajes

Unnamed: 0,PIMC0,providerCode,statusType,origin,description,transactionDate,message,statusCode,status,statusInstanceRecord.statusCode,statusInstanceRecord.description
0,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
1,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
2,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
3,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
4,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
150,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
151,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
152,"[{'*******': 'PIMC0'}, {'*******': 'PIVI0'}, {...",,,,,,,,,,
153,,422,N,MS,Unprocessable Entity,2025-04-08T06:46:39GMT,[CLIENT TIMEOUT],422,failed,,


In [11]:
df_res_completo = pd.concat([df_res.reset_index(drop=True), df_mensajes.reset_index(drop=True)], axis=1)

In [12]:
df_res_completo.dropna(subset='message')

Unnamed: 0,app,timestamp,ambiente,appname,clase,class,codigoEstadoHttp,date,dns_id,endPoint,...,providerCode,statusType,origin,description,transactionDate,message,statusCode,status,statusInstanceRecord.statusCode,statusInstanceRecord.description
66,msd-*******-*******-catrgl-ret,2025-04-08 02:49:27,,,ec.com.dinersclub.dddmodules.bian.rest.Retriev...,ec.com.dinersclub.logger.interceptor.WebInterc...,,2025-04-08,8a08ecc5-5121-40c6-a64a-5e88599ff60b,/experience/*******/credit-*******/v1/*******-...,...,MLT003,N,AS400,ERROR DE GESTOR,2025-04-08T00:48:05GMT,[Tarjeta inválida],422,failed,,
153,638b3fca-6ba9-4e9d-a5b0-aefd830ff2fe,2025-04-08 06:47:27,,msd-crc-partauth-assessment,ec.com.dinersclub.dddmodules.application.rest....,,422.0,2025-04-08,81b0a8ce-10a1-4385-ab5f-9f3de3ef1fdd,/experience/cross-channel/party-authentication...,...,422,N,MS,Unprocessable Entity,2025-04-08T06:46:39GMT,[CLIENT TIMEOUT],422,failed,,


In [14]:
df_res_completo.to_csv('df_complete.csv')