In [1]:
import datetime
import requests
import json
from time import sleep
import csv
import threading
import pandas as pd
import time
from scipy.stats import median_abs_deviation
import os
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns



# Prometheus API endpoint
prometheus_api = "http://192.168.49.2:31090/api/v1/query"

# Prometheus query
query = "sum(rate(container_cpu_usage_seconds_total{pod=~\"front-end-.*\", namespace=\"sock-shop\"}[1m])) by (pod_name)"

# CSV file path
csv_file_path = "frontend_cpu_data.csv"

# CSV file header
csv_header = ["Timestamp", "Value", "anomaly"]

# Lock for synchronization
lock = threading.Lock()

column_name="Value"
window_size=5
threshold=4

def fetch_data():
    while True:
        # Fetch data from Prometheus
        params = {"query": query}
        response = requests.get(prometheus_api, params=params)
        data = response.json()

        # Open CSV file in append mode
        with open(csv_file_path, mode="a", newline="") as csv_file:
            writer = csv.writer(csv_file)

            # Write header if file is empty
            if csv_file.tell() == 0:
                writer.writerow(csv_header)

            # Write data to CSV
            for result in data["data"]["result"]:
                time = result["value"][0]
                dt_object = datetime.datetime.fromtimestamp(time)
                timestamp = dt_object.strftime('%Y-%m-%d %H:%M:%S')
                value = float(result["value"][1])
                rounded_value = round(value, 5)
                anomaly=0
                writer.writerow([timestamp, rounded_value, anomaly])
        
        # Sleep for 5 seconds
        sleep(5)

def detect_anomaly():
    while True:
        
        while not os.path.exists(csv_file_path) or get_csv_row_count() < 5:
            sleep(5)

         # Read CSV file
        data = pd.read_csv(csv_file_path)
        
        new_data = data[-5:]
        

        df = pd.read_csv('frontend_cpu_result.csv')
        
        if df.empty:
            data1=sliding_window_std(new_data, column_name, window_size, threshold)
            data1.to_csv('frontend_cpu_result.csv', mode='a', index=False, header=False)
        else:
            last_value = df.iloc[-1]['Time']
            time2_dt = datetime.datetime.strptime(str(last_value), '%Y-%m-%d %H:%M:%S')
            value= new_data.iloc[0]['Timestamp']
            time1_dt = datetime.datetime.strptime(str(value), '%Y-%m-%d %H:%M:%S')
            if time1_dt > time2_dt :
                data1=sliding_window_std(new_data, column_name, window_size, threshold)
                data1.to_csv('frontend_cpu_result.csv', mode='a', index=False, header=False)
            else:
                pass



        

    # Sleep for 5 seconds
    time.sleep(5)
    
    
# Functions for anomaly detection and evaluation
def sliding_window_std(df, column_name, window_size, threshold):
    results = []
    problem_vals = []  # List to store values that exceed the threshold
    problematic_rows = []  # List to store rows corresponding to problematic values
    for i in range(len(df)):
        # Calculate the window boundaries
        start_index = df.index[i] 
        if i < len(df)-window_size:
            end_index = window_size + i
        else:
            end_index=len(df)

        
        # Get the numbers within the window
        window = df[column_name].iloc[start_index:end_index]
        
        # Calculate the standard deviation of the window
        std_dev = window.std()
        
        # Append the result to the results list
        results.append((i, df.iloc[i][column_name], window.tolist(), std_dev))
        if len(results) >= 3:
            avg_prev_results = (results[-3][3] + results[-2][3]) / 2
            threshold_value = avg_prev_results + threshold

            if std_dev > threshold_value:
                # Append the index and value to problem_vals
                row = finding_row_number(df[column_name],  window.iloc[-1])
                problematic_row = df.iloc[row]
                
                if removing_seasonal_points(problematic_row):
                    #problem_vals.append((i - window_size // 2, window.iloc[-1]))
                    df.at[row, 'anomaly'] = 1
                else:
                    pass
    return df
                

def finding_row_number(original_lst, anomaly_lst):
    i = original_lst.index[0]
    for num in original_lst:
        if num == anomaly_lst:
            return i
        i +=1

def removing_seasonal_points(anomalous_row):
    a=anomalous_row["Timestamp"]
    dt_obj = datetime.datetime.strptime(str(a), '%Y-%m-%d %H:%M:%S')
    if dt_obj.hour== 0 and  dt_obj.minute== 0:
        return False        
    else:
        return True

def get_csv_row_count():
    if os.path.exists(csv_file_path):
        with open(csv_file_path, mode="r") as csv_file:
            reader = csv.reader(csv_file)
            row_count = sum(1 for _ in reader)
        return row_count
    else:
        return 0


def main():
    # Start fetch_data thread
    fetch_data_thread = threading.Thread(target=fetch_data)
    fetch_data_thread.start()

    # Start detect_anomaly thread
    detect_anomaly_thread = threading.Thread(target=detect_anomaly)
    detect_anomaly_thread.start()

if __name__ == '__main__':
    main()
    