In [141]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import warnings
import os
import webbrowser

warnings.filterwarnings('ignore')

In [142]:
logs = pd.read_csv('./API Logs.log', delimiter=' ', header=0)
df = logs[['cs-method', 'cs-uri-stem', 's-port', 'sc-status', 'sc-bytes', 'cs-bytes', 'time-taken']]
df['year'] = pd.to_datetime(logs['date']).dt.year
df['month'] = pd.to_datetime(logs['date']).dt.month
df['day'] = pd.to_datetime(logs['date']).dt.day
df['hour'] = pd.to_datetime(logs['time']).dt.hour
df['minute'] = pd.to_datetime(logs['time']).dt.minute
df.head()

Unnamed: 0,cs-method,cs-uri-stem,s-port,sc-status,sc-bytes,cs-bytes,time-taken,year,month,day,hour,minute
0,GET,/api/values,8080,200,278,299,1167,2023,11,9,18,4
1,GET,/api/values,8080,200,278,299,1,2023,11,9,18,4
2,GET,/api/values,8080,500,4796,299,212,2023,11,22,7,28
3,GET,/api/values,8080,200,278,299,12737,2023,11,22,7,30
4,GET,/api/values/1,8080,200,265,301,66,2023,11,22,7,30


In [143]:
unique_sc_status = df['sc-status'].unique()
#unique_sc_status = unique_sc_status[unique_sc_status != 200]
print("Status: ",unique_sc_status)

Status:  [200 500 204 405 400 415]


In [144]:
data = df[df['time-taken'] > 1000]
unique_data = data['sc-status'].unique()
unique_data = unique_data[unique_data!=200]
for status in unique_data:
    filtered_data = data[data['sc-status'] == status]
    print(status,": ",filtered_data.shape)

405 :  (1, 12)
400 :  (3, 12)
500 :  (1027, 12)


In [165]:
image_dir = 'plots'
os.makedirs(image_dir, exist_ok=True)
saved_img = []
min_no_of_errors = 25
time_taken = 1000
for status in unique_sc_status:
    filtered_df = df[df['sc-status'] == status]
    df_encoded = pd.get_dummies(filtered_df)
    if(filtered_df[filtered_df['time-taken'] > time_taken].shape[0]>min_no_of_errors):
        kmeans = KMeans(n_clusters=5, random_state=1)
        kmeans.fit(df_encoded)
        labels = kmeans.labels_
        plt.figure(figsize=(5, 5))
        plt.scatter(df_encoded['hour'], df_encoded['time-taken'], c=labels)
        plt.xlabel('hour')
        plt.ylabel('time taken')
        plt.title("Hourly status for Error["+str(status)+"]")
        image_path = os.path.join(image_dir, f"hourly_status_{status}.png")
        plt.savefig(image_path)
        saved_img.append(image_path)
        plt.close()
        #plt.show()

        hours = filtered_df['hour'].value_counts()[filtered_df['hour'].value_counts() > min_no_of_errors].index.tolist()
        hours.sort()
        for hour in hours:
            df_hourly = df_encoded[df_encoded['hour'] == hour]
            if(df_hourly.shape[0]>min_no_of_errors):
                df_hourly['hour_minute'] = df_hourly['hour'].astype(str) + '.' + df_hourly['minute'].astype(str)
                df_hourly['hour_minute'] = df_hourly['hour_minute'].astype(float)
                kmeans = KMeans(n_clusters=5, random_state=1)
                kmeans.fit(df_hourly)
                labels = kmeans.labels_
                plt.figure(figsize=(5, 5))
                plt.scatter(df_hourly['hour_minute'], df_hourly['time-taken'], c=labels)
                plt.xlabel('hour_minute')
                plt.ylabel('time taken')
                plt.title("Per minute status for Error["+str(status)+"] at Hour["+str(hour)+"]" )
                image_path = os.path.join(image_dir, f"per_minute_status_{status}_hour_{hour}.png")
                plt.savefig(image_path)
                saved_img.append(image_path)
                plt.close()
                #plt.show()

In [169]:
html_content = """
<!DOCTYPE html>
<html>
<head>
    <title>Plots</title>
</head>
<body>
"""

for img_path in saved_img:
    # Add a line break before and after each image starting with 'plots\\hourly_status'
    if 'hourly_status' in img_path:
        html_content += "<br>"
    html_content += f"<img src=\"{img_path}\" width=\"400\">"
    if 'hourly_status' in img_path:
        html_content += "<br>"

html_content += """
</body>
</html>
"""

# Save the HTML content to a file
html_file_path = "plots.html"
with open(html_file_path, 'w') as html_file:
    html_file.write(html_content)

# Open the HTML file in the default web browser
webbrowser.open('file://' + os.path.abspath(html_file_path))


True