In [2]:
import boto3, json, time
import pandas as pd
from datetime import datetime, timedelta

In [3]:
client = boto3.client('logs')

In [4]:
log_group_names = [
    "/aws/sagemaker/Endpoints/amazon-falconlite2-2023-10-07-18-07-48-251", 
    "/aws/sagemaker/Endpoints/MistralLite-2023-10-19-09-58-03",
]

In [12]:
def query_cw_logs(query, last_x_days):
    # last_x_days = 100
    last_time = timedelta(days=last_x_days)

    start_query_response = client.start_query(
        logGroupNames=log_group_names,
        startTime=int((datetime.today() - last_time).timestamp()),
        endTime=int(datetime.now().timestamp()),
        queryString=query,
    )
    
    query_id = start_query_response['queryId']
    response = None
    while response == None or response['status'] == 'Running':
        print('Waiting 2 secs for query to complete ...')
        time.sleep(2)
        response = client.get_query_results(
            queryId=query_id
        )
    print('Done!')
    # response.keys(): dict_keys(['results', 'statistics', 'status', 'ResponseMetadata'])
    
    data = []
    for item in response["results"]:
        new_item = {}
        for record in item:
            new_item[record["field"]] = record["value"]
        data.append(new_item)
    
    df = pd.DataFrame(data)
    return df

In [18]:
# get all records with ERROR sort with timestamp and log_group_id
# query = """
# fields @message 
# | filter @message like /ERROR/
# | parse @message "* : *" as loggingRandom, loggingMessage
# | display @timestamp, loggingMessage
# | sort @timestamp 
# """

# Extracts the fields loggingTime, loggingType and loggingMessage, filters down to log events that contain ERROR or INFO strings, and then displays only the loggingMessage and loggingType fields for events that contain an ERROR string.
# query = """
# fields @message
#     | parse @message "* [*] *" as loggingTime, loggingType, loggingMessage
#     | filter loggingType IN ["ERROR", "INFO"]
#     | display loggingMessage, loggingType = "ERROR" as isError
# """

# "show the latest 20 log events",
query = """fields @timestamp, @message
| sort @timestamp desc
| limit 20"""

In [19]:
df = query_cw_logs(query, last_x_days=300)
df.head()

Waiting 2 secs for query to complete ...
Waiting 2 secs for query to complete ...
Done!


Unnamed: 0,@timestamp,@message,@ptr
0,2023-10-19 10:07:11.211,#033[2m2023-10-19T10:07:06.648856Z#033[0m #033...,CoYBCkkKRTMyNDYyMjQwMDUxNDovYXdzL3NhZ2VtYWtlci...
1,2023-10-19 10:06:56.212,#033[2m2023-10-19T10:06:51.971205Z#033[0m #033...,CoYBCkkKRTMyNDYyMjQwMDUxNDovYXdzL3NhZ2VtYWtlci...
2,2023-10-19 10:05:53.212,#033[2m2023-10-19T10:05:49.026121Z#033[0m #033...,CoYBCkkKRTMyNDYyMjQwMDUxNDovYXdzL3NhZ2VtYWtlci...
3,2023-10-19 10:05:41.212,#033[2m2023-10-19T10:05:36.368668Z#033[0m #033...,CoYBCkkKRTMyNDYyMjQwMDUxNDovYXdzL3NhZ2VtYWtlci...
4,2023-10-19 10:05:24.212,#033[2m2023-10-19T10:05:19.373771Z#033[0m #033...,CoYBCkkKRTMyNDYyMjQwMDUxNDovYXdzL3NhZ2VtYWtlci...
