In [0]:
%pip install azure-monitor-query
%pip install azure-identity
dbutils.library.restartPython()

In [0]:
config_path = "dbfs:/configs/config.json"
config = spark.read.option("multiline", "true").json(config_path)
first_row = config.first()
env = first_row["env"].strip().lower()
lz_key = first_row["lz_key"].strip().lower()
keyvault_name = f"ingest{lz_key}-meta002-{env}"
client_secret = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-SECRET')
tenant_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-TENANT-ID')
client_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-ID')
    

In [0]:
from azure.identity import ClientSecretCredential
from azure.monitor.query import LogsQueryClient, LogsQueryStatus
from datetime import datetime, timedelta

credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

client = LogsQueryClient(credential)
workspace_id = "9db45d41-bfe4-49dd-9a3b-5da0ab1a95d0"

# First, let's see which tables have data in the last 5 hours
query = """
search *
| where TimeGenerated > ago(3d)
| summarize count() by $table
| sort by count_ desc
"""

end_time = datetime.utcnow()
start_time = end_time - timedelta(hours=72)

response = client.query_workspace(
    workspace_id,
    query,
    timespan=(start_time, end_time)
)

if response.status == LogsQueryStatus.SUCCESS:
    print("Tables with data in the last 3 days:\n")
    for table in response.tables:
        for row in table.rows:
            print(f"Table: {row[0]}, Count: {row[1]}")

In [0]:
from azure.identity import ClientSecretCredential
from azure.monitor.query import LogsQueryClient, LogsQueryStatus
from datetime import datetime, timedelta

credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

client = LogsQueryClient(credential)
workspace_id = "9db45d41-bfe4-49dd-9a3b-5da0ab1a95d0"

# Search AppTraces for your specific message
query = """
AppTraces
| where TimeGenerated > ago(3d)
| where Message contains "Validate posting payload"
| project TimeGenerated, Message
| order by TimeGenerated desc
| take 100
"""

end_time = datetime.utcnow()
start_time = end_time - timedelta(hours=72)

response = client.query_workspace(
    workspace_id,
    query,
    timespan=(start_time, end_time)
)

if response.status == LogsQueryStatus.SUCCESS:
    for table in response.tables:
        print(f"Found {len(table.rows)} matching messages\n")
        
        for i, row in enumerate(table.rows[:10]):
            timestamp = row[0]
            message = row[1]
            print(f"\n--- Message {i+1} ---")
            print(f"Time: {timestamp}")
            print(f"Message: {message}")
else:
    print(f"Query failed: {response.status}")

In [0]:
from azure.identity import ClientSecretCredential
from azure.monitor.query import LogsQueryClient, LogsQueryStatus
from datetime import datetime, timedelta
import pandas as pd
import re

credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

client = LogsQueryClient(credential)
workspace_id = "9db45d41-bfe4-49dd-9a3b-5da0ab1a95d0"

query_validate = """
AppTraces
| where TimeGenerated > ago(3d)
| where Message contains "Validate posting payload"
| project TimeGenerated, Message
| order by TimeGenerated desc
| take 100
"""

end_time = datetime.utcnow()
start_time = end_time - timedelta(hours=72)

response = client.query_workspace(
    workspace_id,
    query_validate,
    timespan=(start_time, end_time)
)

if response.status == LogsQueryStatus.SUCCESS:
    for table in response.tables:
        # Convert to DataFrame
        df_validate = pd.DataFrame(
            data=table.rows,
            columns=table.columns
        )
        
else:
    print(f"Query failed: {response.status}")
    df_validate = pd.DataFrame()

df_validate['payload_validation'] = df_validate['Message'].str.split("json = ", n=1).str[1]
df_validate['payload_validation'] = df_validate['payload_validation'].str.split("'event_token'", n=1).str[0]
df_validate['CaseNo'] = df_validate['Message'].str.extract(r"'appealReferenceNumber':\s*'([A-Z]+/\d+/\d+)'")
df_validate = df_validate.rename(columns={'TimeGenerated': 'payload_validation_time'})
df_validate = df_validate[['CaseNo', 'payload_validation', 'payload_validation_time']]
df_validate.display()

In [0]:
from azure.identity import ClientSecretCredential
from azure.monitor.query import LogsQueryClient, LogsQueryStatus
from datetime import datetime, timedelta
import pandas as pd
import re

credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

client = LogsQueryClient(credential)
workspace_id = "9db45d41-bfe4-49dd-9a3b-5da0ab1a95d0"

query_submit = """
AppTraces
| where TimeGenerated > ago(3d)
| where Message contains "Submit payload: submit_case_url"
| project TimeGenerated, Message
| order by TimeGenerated desc
| take 100
"""

end_time = datetime.utcnow()
start_time = end_time - timedelta(hours=72)

response = client.query_workspace(
    workspace_id,
    query_submit,
    timespan=(start_time, end_time)
)

if response.status == LogsQueryStatus.SUCCESS:
    for table in response.tables:
        # Convert to DataFrame
        df_submit = pd.DataFrame(
            data=table.rows,
            columns=table.columns
        )
        
else:
    print(f"Query failed: {response.status}")
    df_submit = pd.DataFrame()


df_submit['payload_submission'] = df_submit['Message'].str.split("json = ", n=1).str[1]
df_submit['payload_submission'] = df_submit['payload_submission'].str.split("'event_token'", n=1).str[0]
df_submit['CaseNo'] = df_submit['Message'].str.extract(r"'appealReferenceNumber':\s*'([A-Z]+/\d+/\d+)'")
df_submit = df_submit.rename(columns={'TimeGenerated': 'payload_submission_time'})
df_submit = df_submit[['CaseNo', 'payload_submission', 'payload_submission_time']]
df_submit.display()

In [0]:
from azure.identity import ClientSecretCredential
from azure.monitor.query import LogsQueryClient, LogsQueryStatus
from datetime import datetime, timedelta
import pandas as pd
import re

credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

client = LogsQueryClient(credential)
workspace_id = "9db45d41-bfe4-49dd-9a3b-5da0ab1a95d0"

query_submit = """
AppTraces
| where TimeGenerated > ago(3d)
| where Message contains "Submit Response status:"
| project TimeGenerated, Message
| order by TimeGenerated desc
| take 100
"""

end_time = datetime.utcnow()
start_time = end_time - timedelta(hours=72)

response = client.query_workspace(
    workspace_id,
    query_submit,
    timespan=(start_time, end_time)
)

if response.status == LogsQueryStatus.SUCCESS:
    for table in response.tables:
        # Convert to DataFrame
        df_response = pd.DataFrame(
            data=table.rows,
            columns=table.columns
        )
        
        # Extract appellantPartyId (UUID format)
        df_response['appellantPartyId'] = df_response['Message'].str.extract(r'"appellantPartyId":\s*"([a-f0-9\-]{36})"')
        
        # Extract other fields you might need
        df_response['response_status'] = df_response['Message'].str.split("Submit Response status:", n=1).str[1]
        
        df_response = df_response.rename(columns={'TimeGenerated': 'ResponseTime'})
        df_response = df_response[['appellantPartyId', 'response_status', 'ResponseTime']]
        
else:
    print(f"Query failed: {response.status}")
    df_response = pd.DataFrame()

df_validate['appellantPartyId'] = df_validate['payload_validation'].str.extract(r"['\"]appellantPartyId['\"]:\s*['\"]([a-f0-9\-]{36})['\"]")

# # Join on appellantPartyId
df_validate["appellantPartyId"] = df_validate["appellantPartyId"].astype(str)
df_response["appellantPartyId"] = df_response["appellantPartyId"].astype(str)

df_final = pd.merge(df_validate, df_response, on="appellantPartyId", how="left"
            ).merge(df_submit, on="CaseNo", how="left")
df_final = df_final[['CaseNo', 'appellantPartyId', 'payload_validation', 'payload_submission', 'response_status', 'payload_validation_time', 'ResponseTime']]

# df_response.display()
# df_validate.display()
df_final.display()

In [0]:
# df_validate["CaseNo"] = df_validate["CaseNo"].astype(str)
# df_submit["CaseNo"] = df_submit["CaseNo"].astype(str)

# df_final = pd.merge(df_validate, df_submit, on="CaseNo", how="left")
# df_final['payload_difference'] = df_final['payload_validation'] == df_final['payload_submission']

# df_final = df_final[['CaseNo', 'payload_validation', 'payload_submission', 'payload_difference','payload_validation_time', 'payload_submission_time']]

# display(df_final)