In [1]:
import requests
import pandas as pd

## Fetch Metrics from Prometheus

In [2]:
# Define the Prometheus endpoint.
PROMETHEUS_URL = 'http://localhost:9090/api/v1/query_range'

# Define the time range for the data.
start_time = '2023-09-12T07:10:00Z'
end_time = '2023-09-12T17:40:00Z'
step = '5s'

microservices = ['carts', 'catalogue', 'front-end', 'orders', 'payment', 'shipping', 'user']

fetched_metrics = None

In [3]:
for microservice in microservices:

    # Define the query for the response time.
    response_time_query = f"""
    sum(
        rate(
            istio_request_duration_milliseconds_sum{{
                destination_service_name="{microservice}"
            }}[1m]
        )
    )
    /
    sum(
        rate(
            istio_request_duration_milliseconds_count{{
                destination_service_name="{microservice}"
            }}[1m]
        )
    )
    """

    # Define the query for the CPU usage in the last minute.
    cpu_usage_query = f"""
    sum(
        rate(
            container_cpu_usage_seconds_total{{
                container="{microservice}"
            }}[1m]
        )
    ) by (pod)
    """
    
    # Define the query for the total memory usage in bytes.
    memory_usage_query = f"""
    sum(
        container_memory_working_set_bytes{{
            container="{microservice}"
        }}
    ) by (pod)
    """

    # Define the query for the bytes received over the network
    # by the container per second in the last minute.
    network_receive_query = f"""
    sum(
        rate(
            container_network_receive_bytes_total{{
                namespace="sock-shop"
            }}[1m]
        )
        * on(namespace, pod)
        group_left(workload)
        namespace_workload_pod:kube_pod_owner:relabel{{
            namespace="sock-shop",
            workload="{microservice}"
        }}
    ) by (pod)
    """

    # Define the query for the bytes transmitted over the network
    # by the container per second in the last minute.
    network_transmit_query = f"""
    sum(
        rate(
            container_network_transmit_packets_total{{
                namespace="sock-shop"
            }}[1m])
        * on (namespace,pod)
        group_left(workload)
        namespace_workload_pod:kube_pod_owner:relabel{{
            namespace="sock-shop",
            workload="{microservice}"
        }}
    ) by (pod)
    """

    queries = {
        'response_time': response_time_query,
        'cpu_usage': cpu_usage_query,
        'memory_usage': memory_usage_query,
        'network_receive': network_receive_query,
        'network_transmit': network_transmit_query
    }

    for metric, query in queries.items():
        # Make the API request.
        response = requests.get(
            PROMETHEUS_URL,
            params={
                'query': query,
                'start': start_time,
                'end': end_time,
                'step': step
            }
        )
        
        # Check if the request was successful.
        response.raise_for_status()

        # Convert the data to a dataframe.
        data = response.json()['data']['result'][0]['values']
        df = pd.DataFrame(data, columns=['timestamp', f'{microservice}_{metric}'])
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')

        if fetched_metrics is None:
            fetched_metrics = df
        else:
            fetched_metrics = pd.merge(fetched_metrics, df, on='timestamp', how='outer')

# Reset index for the final dataframe.
fetched_metrics.set_index('timestamp', inplace=True)

In [4]:
fetched_metrics.fillna(method='ffill', inplace=True)  # Forward fill.
fetched_metrics.fillna(method='bfill', inplace=True)  # Backward fill any remaining NaNs.
fetched_metrics

Unnamed: 0_level_0,carts_response_time,carts_cpu_usage,carts_memory_usage,carts_network_receive,carts_network_transmit,catalogue_response_time,catalogue_cpu_usage,catalogue_memory_usage,catalogue_network_receive,catalogue_network_transmit,...,shipping_response_time,shipping_cpu_usage,shipping_memory_usage,shipping_network_receive,shipping_network_transmit,user_response_time,user_cpu_usage,user_memory_usage,user_network_receive,user_network_transmit
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-12 07:10:00,12.476340940120815,0.18547890088321844,294682624,420056.65241080325,941.1799168082489,12.437493082457138,0.024590561765954505,6643712,139555.86931655707,202.2447812410638,...,2.1944444444444446,0.0052111929537415535,270745600,7956.755630481314,12.539541227056619,8.95863983344905,0.04435522920582362,9068544,182688.9577416301,367.2433083742146
2023-09-12 07:10:05,12.041813036690137,0.18547890088321844,294682624,280896.1415444082,644.1618677138906,10.741431730237572,0.013942173067587412,6643712,99696.38710859309,144.48030097226194,...,2.1944444444444446,0.0036999904237644504,270745600,5002.824262536873,7.921533923303835,8.739691614691575,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
2023-09-12 07:10:10,12.041813036690137,0.18547890088321842,294682624,400070.4653230587,917.4570243034973,10.741431730237572,0.01595301507632577,6643712,132019.8421840802,191.538993203548,...,2.1944444444444446,0.005300124306459976,270741504,5665.722175923413,8.971174692799103,8.739691614691575,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
2023-09-12 07:10:15,12.041813036690137,0.18547890088321844,294682624,400070.4653230587,917.4570243034973,10.10742904841405,0.017963857085064126,6643712,132019.8421840802,191.538993203548,...,2.1944444444444446,0.005300124306459976,270741504,6328.620089309952,10.02081546229437,8.61599513720038,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
2023-09-12 07:10:20,12.041813036690137,0.18547890088321844,294682624,400070.4653230587,917.4570243034973,10.10742904841405,0.019067005011057993,6643712,132019.8421840802,191.538993203548,...,2.1944444444444446,0.005300124306459976,270741504,5847.223624608648,9.258566335486908,8.61599513720038,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-12 17:39:40,20.82031641043803,0.19104964232241167,317607936,443524.9746633851,1012.0529897205735,21.12278270509978,0.02649505388578243,9170944,175567.72430714327,253.64783732039635,...,2.0647058823515185,0.003925517166390261,302043136,4924.353845297719,8.001969949916528,13.088017955801233,0.04286331333640354,19046400,186938.31061952727,381.6906972150898
2023-09-12 17:39:45,20.82031641043803,0.19104964232241167,317607936,443524.9746633851,1012.0529897205735,21.44362934363181,0.025807625430748833,9179136,167344.83723906262,241.90194353355193,...,2.0647058823515185,0.003925517166390261,302043136,5549.335295863478,9.017551474680024,13.054920580108824,0.04286331333640354,19046400,186938.31061952727,381.6906972150898
2023-09-12 17:39:50,20.82031641043803,0.19104964232241167,317607936,443524.97466338496,1012.0529897205734,21.44362934363181,0.025807625430748833,9179136,167344.83723906262,241.90194353355193,...,2.0647058823515185,0.003925517166390261,302043136,5817.827327026526,9.45384529771842,13.054920580108824,0.04286331333640354,19046400,186938.3106195273,381.6906972150899
2023-09-12 17:39:55,20.82031641043803,0.19104964232241167,317607936,443524.97466338496,1012.0529897205734,21.44362934363181,0.025807625430748833,9179136,167344.83723906262,241.90194353355193,...,2.1396551724125383,0.003925517166390261,302043136,5192.8458764607685,8.438263772954926,13.054920580108824,0.04254098554611217,19054592,186938.3106195273,381.6906972150899


In [5]:
fetched_metrics.columns

Index(['carts_response_time', 'carts_cpu_usage', 'carts_memory_usage',
       'carts_network_receive', 'carts_network_transmit',
       'catalogue_response_time', 'catalogue_cpu_usage',
       'catalogue_memory_usage', 'catalogue_network_receive',
       'catalogue_network_transmit', 'front-end_response_time',
       'front-end_cpu_usage', 'front-end_memory_usage',
       'front-end_network_receive', 'front-end_network_transmit',
       'orders_response_time', 'orders_cpu_usage', 'orders_memory_usage',
       'orders_network_receive', 'orders_network_transmit',
       'payment_response_time', 'payment_cpu_usage', 'payment_memory_usage',
       'payment_network_receive', 'payment_network_transmit',
       'shipping_response_time', 'shipping_cpu_usage', 'shipping_memory_usage',
       'shipping_network_receive', 'shipping_network_transmit',
       'user_response_time', 'user_cpu_usage', 'user_memory_usage',
       'user_network_receive', 'user_network_transmit'],
      dtype='object')

## Label the Metrics Data

In [6]:
labeled_data = fetched_metrics.copy()

# Create a new column initialized to 0 (normal data points).
labeled_data.insert(0, 'label', 0)

# Define the duration variables.
anomaly_duration = pd.Timedelta(minutes=5)
cooldown_duration = pd.Timedelta(minutes=10)

# Localize the start time for the first anomaly.
start_anomaly = labeled_data.index.min() + pd.Timedelta(minutes=5)

# While there are rows left in the dataframe, keep labeling.
while start_anomaly < labeled_data.index.max():
    # Find the end of the anomaly.
    end_anomaly = start_anomaly + anomaly_duration

    # Set the label for the anomaly duration.
    labeled_data.loc[start_anomaly:end_anomaly, 'label'] = 1

    # Move to the next anomaly start time.
    start_anomaly = end_anomaly + cooldown_duration

labeled_data.reset_index(inplace=True)
labeled_data

Unnamed: 0,timestamp,label,carts_response_time,carts_cpu_usage,carts_memory_usage,carts_network_receive,carts_network_transmit,catalogue_response_time,catalogue_cpu_usage,catalogue_memory_usage,...,shipping_response_time,shipping_cpu_usage,shipping_memory_usage,shipping_network_receive,shipping_network_transmit,user_response_time,user_cpu_usage,user_memory_usage,user_network_receive,user_network_transmit
0,2023-09-12 07:10:00,0,12.476340940120815,0.18547890088321844,294682624,420056.65241080325,941.1799168082489,12.437493082457138,0.024590561765954505,6643712,...,2.1944444444444446,0.0052111929537415535,270745600,7956.755630481314,12.539541227056619,8.95863983344905,0.04435522920582362,9068544,182688.9577416301,367.2433083742146
1,2023-09-12 07:10:05,0,12.041813036690137,0.18547890088321844,294682624,280896.1415444082,644.1618677138906,10.741431730237572,0.013942173067587412,6643712,...,2.1944444444444446,0.0036999904237644504,270745600,5002.824262536873,7.921533923303835,8.739691614691575,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
2,2023-09-12 07:10:10,0,12.041813036690137,0.18547890088321842,294682624,400070.4653230587,917.4570243034973,10.741431730237572,0.01595301507632577,6643712,...,2.1944444444444446,0.005300124306459976,270741504,5665.722175923413,8.971174692799103,8.739691614691575,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
3,2023-09-12 07:10:15,0,12.041813036690137,0.18547890088321844,294682624,400070.4653230587,917.4570243034973,10.10742904841405,0.017963857085064126,6643712,...,2.1944444444444446,0.005300124306459976,270741504,6328.620089309952,10.02081546229437,8.61599513720038,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
4,2023-09-12 07:10:20,0,12.041813036690137,0.18547890088321844,294682624,400070.4653230587,917.4570243034973,10.10742904841405,0.019067005011057993,6643712,...,2.1944444444444446,0.005300124306459976,270741504,5847.223624608648,9.258566335486908,8.61599513720038,0.04435522920582362,9072640,182688.9577416301,367.2433083742146
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7556,2023-09-12 17:39:40,0,20.82031641043803,0.19104964232241167,317607936,443524.9746633851,1012.0529897205735,21.12278270509978,0.02649505388578243,9170944,...,2.0647058823515185,0.003925517166390261,302043136,4924.353845297719,8.001969949916528,13.088017955801233,0.04286331333640354,19046400,186938.31061952727,381.6906972150898
7557,2023-09-12 17:39:45,0,20.82031641043803,0.19104964232241167,317607936,443524.9746633851,1012.0529897205735,21.44362934363181,0.025807625430748833,9179136,...,2.0647058823515185,0.003925517166390261,302043136,5549.335295863478,9.017551474680024,13.054920580108824,0.04286331333640354,19046400,186938.31061952727,381.6906972150898
7558,2023-09-12 17:39:50,0,20.82031641043803,0.19104964232241167,317607936,443524.97466338496,1012.0529897205734,21.44362934363181,0.025807625430748833,9179136,...,2.0647058823515185,0.003925517166390261,302043136,5817.827327026526,9.45384529771842,13.054920580108824,0.04286331333640354,19046400,186938.3106195273,381.6906972150899
7559,2023-09-12 17:39:55,0,20.82031641043803,0.19104964232241167,317607936,443524.97466338496,1012.0529897205734,21.44362934363181,0.025807625430748833,9179136,...,2.1396551724125383,0.003925517166390261,302043136,5192.8458764607685,8.438263772954926,13.054920580108824,0.04254098554611217,19054592,186938.3106195273,381.6906972150899


In [7]:
anomaly_fraction = len(labeled_data[labeled_data['label'] == 1])/len(labeled_data)
anomaly_fraction

0.3388440682449411

In [8]:
labeled_data.to_pickle('labeled_data.pkl')