In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.ensemble import IsolationForest
import pandas as pd

# 1.Throughput

In [None]:
df = pd.read_csv('http_metrics.csv')
model_data = df[['ds', 'throughput']].set_index('ds')
model_data.head()

Unnamed: 0_level_0,throughput
ds,Unnamed: 1_level_1
2024-04-15 23:32:00,3898.0
2024-04-15 23:33:00,3917.5
2024-04-15 23:34:00,3993.0
2024-04-15 23:35:00,3991.5
2024-04-15 23:36:00,3915.5


In [None]:
def run_isolation_forest(model_data, contamination=0.002, n_estimators=200, max_samples=0.9):
    clf = (IsolationForest(random_state=0,
                          contamination=contamination,
                          n_estimators=n_estimators,
                          max_samples=max_samples)
         )

    clf.fit(model_data)
    output = pd.Series(clf.predict(model_data)).apply(lambda x: 1 if x == -1 else 0)
    score = clf.decision_function(model_data)
    return output, score

outliers, score = run_isolation_forest(model_data)
df = (df
             .assign(Outliers = outliers)
             .assign(Score = score)
     )

In [None]:
outliers_data = df[['ds', 'throughput', 'Outliers']]
outliers_data

Unnamed: 0,ds,throughput,Outliers
0,2024-04-15 23:32:00,3898.0,0
1,2024-04-15 23:33:00,3917.5,0
2,2024-04-15 23:34:00,3993.0,0
3,2024-04-15 23:35:00,3991.5,0
4,2024-04-15 23:36:00,3915.5,0
...,...,...,...
43267,2024-05-16 00:54:00,2560.0,0
43268,2024-05-16 00:55:00,2491.0,0
43269,2024-05-16 00:56:00,2429.0,0
43270,2024-05-16 00:57:00,2373.5,0


In [None]:
# Переименовываем столбец 'Outliers' в 'is_anomaly'
outliers_data.rename(columns={'Outliers': 'is_anomaly'}, inplace=True)
outliers_data['is_anomaly'] = outliers_data['is_anomaly'].astype(bool)
outliers_data['Метрика'] = 'Throughput'
outliers_data['Модель'] = 'Isolation Forest'
outliers_data.rename(columns={'ds': 'timestamp', 'throughput': 'value'}, inplace=True)
outliers_data = outliers_data[['Модель', 'Метрика', 'timestamp', 'value', 'is_anomaly']]
outliers_data.to_csv('isolation_forest_throughput.csv', index=False)

In [None]:
outliers_data = df[['ds', 'throughput', 'Outliers']]
outliers_data

Unnamed: 0,ds,throughput,Outliers
0,2024-04-15 23:32:00,3898.0,0
1,2024-04-15 23:33:00,3917.5,0
2,2024-04-15 23:34:00,3993.0,0
3,2024-04-15 23:35:00,3991.5,0
4,2024-04-15 23:36:00,3915.5,0
...,...,...,...
43267,2024-05-16 00:54:00,2560.0,0
43268,2024-05-16 00:55:00,2491.0,0
43269,2024-05-16 00:56:00,2429.0,0
43270,2024-05-16 00:57:00,2373.5,0


In [None]:
import plotly.express as px
import plotly.graph_objects as go

# Создание базового графика временного ряда
fig = px.line(outliers_data, x='ds', y='throughput', title='Throughput Over Time with Anomalies', labels={'ds': 'Date', 'throughput': 'Throughput'})

# Добавление точек с аномалиями
anomalies = outliers_data[outliers_data['Outliers'] == 1]
fig.add_trace(go.Scatter(x=anomalies['ds'], y=anomalies['throughput'], mode='markers', marker=dict(color='red', size=10), name='Anomalies'))

# Настройка графика
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Throughput',
    legend=dict(x=0, y=1),
    template='plotly_white'
)

# Показ графика
fig.show()

# 2. Web response

In [None]:
df = pd.read_csv('http_metrics.csv')
model_data = df[['ds', 'web_response_time']].set_index('ds')
model_data.head()

Unnamed: 0_level_0,web_response_time
ds,Unnamed: 1_level_1
2024-04-15 23:32:00,0.007541
2024-04-15 23:33:00,0.008417
2024-04-15 23:34:00,0.009546
2024-04-15 23:35:00,0.009014
2024-04-15 23:36:00,0.012279


In [None]:
def run_isolation_forest(model_data, contamination=0.003, n_estimators=100, max_samples=0.9):
    clf = (IsolationForest(random_state=0,
                          contamination=contamination,
                          n_estimators=n_estimators,
                          max_samples=max_samples)
         )

    clf.fit(model_data)
    output = pd.Series(clf.predict(model_data)).apply(lambda x: 1 if x == -1 else 0)
    score = clf.decision_function(model_data)

    return output, score


outliers, score = run_isolation_forest(model_data)

df = (df
             .assign(Outliers = outliers)
             .assign(Score = score)
     )


In [None]:
outliers_data = df[['ds', 'web_response_time', 'Outliers']]
outliers_data

Unnamed: 0,ds,web_response_time,Outliers
0,2024-04-15 23:32:00,0.007541,0
1,2024-04-15 23:33:00,0.008417,0
2,2024-04-15 23:34:00,0.009546,0
3,2024-04-15 23:35:00,0.009014,0
4,2024-04-15 23:36:00,0.012279,0
...,...,...,...
43267,2024-05-16 00:54:00,0.007921,0
43268,2024-05-16 00:55:00,0.009280,0
43269,2024-05-16 00:56:00,0.010534,0
43270,2024-05-16 00:57:00,0.009379,0


In [None]:
import plotly.express as px
import plotly.graph_objects as go


# Создание базового графика временного ряда
fig = px.line(outliers_data, x='ds', y='web_response_time', title='web_response_time Over Time with Anomalies', labels={'ds': 'Date', 'web_response_time': 'web_response_time'})

# Добавление точек с аномалиями
anomalies = outliers_data[outliers_data['Outliers'] == 1]
fig.add_trace(go.Scatter(x=anomalies['ds'], y=anomalies['web_response_time'], mode='markers', marker=dict(color='red', size=10), name='Anomalies'))

# Настройка графика
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='web_response_time',
    legend=dict(x=0, y=1),
    template='plotly_white'
)

# Показ графика
fig.show()

In [None]:
# Переименовываем столбец 'Outliers' в 'is_anomaly'
outliers_data.rename(columns={'Outliers': 'is_anomaly'}, inplace=True)
outliers_data['is_anomaly'] = outliers_data['is_anomaly'].astype(bool)
outliers_data['Метрика'] = 'Web Response'
outliers_data['Модель'] = 'Isolation Forest'
outliers_data.rename(columns={'ds': 'timestamp', 'web_response_time': 'value'}, inplace=True)
outliers_data = outliers_data[['Модель', 'Метрика', 'timestamp', 'value', 'is_anomaly']]
outliers_data.to_csv('isolation_forest_web_response.csv', index=False)

# 2.Apdex

In [None]:
df = pd.read_csv('apdex_metrics.csv')
model_data = df[['ds', 'apdex']].set_index('ds')
model_data.head()

Unnamed: 0_level_0,apdex
ds,Unnamed: 1_level_1
2024-04-15 23:32:00,0.999679
2024-04-15 23:33:00,0.999745
2024-04-15 23:34:00,0.999687
2024-04-15 23:35:00,0.999812
2024-04-15 23:36:00,0.999489


In [None]:
outliers, score = run_isolation_forest(model_data)
df = (df
             .assign(Outliers = outliers)
             .assign(Score = score)
     )


In [None]:
outliers_data = df[['ds', 'apdex', 'Outliers']]
outliers_data

Unnamed: 0,ds,apdex,Outliers
0,2024-04-15 23:32:00,0.999679,0
1,2024-04-15 23:33:00,0.999745,0
2,2024-04-15 23:34:00,0.999687,0
3,2024-04-15 23:35:00,0.999812,0
4,2024-04-15 23:36:00,0.999489,0
...,...,...,...
43267,2024-05-16 00:54:00,0.998828,0
43268,2024-05-16 00:55:00,0.998495,0
43269,2024-05-16 00:56:00,0.997736,0
43270,2024-05-16 00:57:00,0.997999,0


In [None]:
import plotly.express as px
import plotly.graph_objects as go


# Создание базового графика временного ряда
fig = px.line(outliers_data, x='ds', y='apdex', title='Apdex Over Time with Anomalies', labels={'ds': 'Date', 'apdex': 'apdex'})

# Добавление точек с аномалиями
anomalies = outliers_data[outliers_data['Outliers'] == 1]
fig.add_trace(go.Scatter(x=anomalies['ds'], y=anomalies['apdex'], mode='markers', marker=dict(color='red', size=10), name='Anomalies'))

# Настройка графика
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='apdex',
    legend=dict(x=0, y=1),
    template='plotly_white'
)

# Показ графика
fig.show()

In [None]:
# Переименовываем столбец 'Outliers' в 'is_anomaly'
outliers_data.rename(columns={'Outliers': 'is_anomaly'}, inplace=True)
outliers_data['is_anomaly'] = outliers_data['is_anomaly'].astype(bool)
outliers_data['Метрика'] = 'Apdex'
outliers_data['Модель'] = 'Isolation Forest'
outliers_data.rename(columns={'ds': 'timestamp', 'apdex': 'value'}, inplace=True)
outliers_data = outliers_data[['Модель', 'Метрика', 'timestamp', 'value', 'is_anomaly']]
outliers_data.to_csv('isolation_forest_apdex.csv', index=False)

In [None]:
outliers_data

Unnamed: 0,Модель,Метрика,timestamp,value,is_anomaly
0,Isolation Forest,Apdex,2024-04-15 23:32:00,0.999679,False
1,Isolation Forest,Apdex,2024-04-15 23:33:00,0.999745,False
2,Isolation Forest,Apdex,2024-04-15 23:34:00,0.999687,False
3,Isolation Forest,Apdex,2024-04-15 23:35:00,0.999812,False
4,Isolation Forest,Apdex,2024-04-15 23:36:00,0.999489,False
...,...,...,...,...,...
43267,Isolation Forest,Apdex,2024-05-16 00:54:00,0.998828,False
43268,Isolation Forest,Apdex,2024-05-16 00:55:00,0.998495,False
43269,Isolation Forest,Apdex,2024-05-16 00:56:00,0.997736,False
43270,Isolation Forest,Apdex,2024-05-16 00:57:00,0.997999,False


# 3. Error

In [None]:
df = pd.read_csv('error_ratio.csv')
model_data = df[['ds', 'Error']].set_index('ds')
model_data.head()

Unnamed: 0_level_0,Error
ds,Unnamed: 1_level_1
2024-04-15 23:32:00,0.000128
2024-04-15 23:33:00,0.000128
2024-04-15 23:34:00,0.0
2024-04-15 23:35:00,0.0
2024-04-15 23:36:00,0.0


In [None]:
def run_isolation_forest(model_data, contamination=0.001, n_estimators=150, max_samples=0.9):
    clf = (IsolationForest(random_state=0,
                          contamination=contamination,
                          n_estimators=n_estimators,
                          max_samples=max_samples)
         )

    clf.fit(model_data)
    output = pd.Series(clf.predict(model_data)).apply(lambda x: 1 if x == -1 else 0)
    score = clf.decision_function(model_data)

    return output, score


outliers, score = run_isolation_forest(model_data)

df = (df
             .assign(Outliers = outliers)
             .assign(Score = score)
     )


In [None]:
outliers_data = df[['ds', 'Error', 'Outliers']]
outliers_data

Unnamed: 0,ds,Error,Outliers
0,2024-04-15 23:32:00,0.000128,0
1,2024-04-15 23:33:00,0.000128,0
2,2024-04-15 23:34:00,0.000000,0
3,2024-04-15 23:35:00,0.000000,0
4,2024-04-15 23:36:00,0.000000,0
...,...,...,...
43267,2024-05-16 00:54:00,0.000000,0
43268,2024-05-16 00:55:00,0.000000,0
43269,2024-05-16 00:56:00,0.000206,0
43270,2024-05-16 00:57:00,0.000211,0


In [None]:
import plotly.express as px
import plotly.graph_objects as go


# Создание базового графика временного ряда
fig = px.line(df, x='ds', y='Error', title='Error Over Time with Anomalies', labels={'ds': 'Date', 'Error': 'Error'})

# Добавление точек с аномалиями
anomalies = df[outliers_data['Outliers'] == 1]
fig.add_trace(go.Scatter(x=anomalies['ds'], y=anomalies['Error'], mode='markers', marker=dict(color='red', size=10), name='Anomalies'))

# Настройка графика
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Error',
    legend=dict(x=0, y=1),
    template='plotly_white'
)

# Показ графика
fig.show()

In [None]:
outliers_data.head(1)

Unnamed: 0,ds,Error,Outliers
0,2024-04-15 23:32:00,0.000128,0


In [None]:
# Переименовываем столбец 'Outliers' в 'is_anomaly'
outliers_data.rename(columns={'Outliers': 'is_anomaly'}, inplace=True)
outliers_data['is_anomaly'] = outliers_data['is_anomaly'].astype(bool)
outliers_data['Метрика'] = 'Error'
outliers_data['Модель'] = 'Isolation Forest'
outliers_data.rename(columns={'ds': 'timestamp', 'Error': 'value'}, inplace=True)
outliers_data = outliers_data[['Модель', 'Метрика', 'timestamp', 'value', 'is_anomaly']]
outliers_data.to_csv('isolation_forest_error.csv', index=False)

In [None]:
outliers_data

Unnamed: 0,Модель,Метрика,timestamp,value,is_anomaly
0,Isolation Forest,Error,2024-04-15 23:32:00,0.000128,False
1,Isolation Forest,Error,2024-04-15 23:33:00,0.000128,False
2,Isolation Forest,Error,2024-04-15 23:34:00,0.000000,False
3,Isolation Forest,Error,2024-04-15 23:35:00,0.000000,False
4,Isolation Forest,Error,2024-04-15 23:36:00,0.000000,False
...,...,...,...,...,...
43267,Isolation Forest,Error,2024-05-16 00:54:00,0.000000,False
43268,Isolation Forest,Error,2024-05-16 00:55:00,0.000000,False
43269,Isolation Forest,Error,2024-05-16 00:56:00,0.000206,False
43270,Isolation Forest,Error,2024-05-16 00:57:00,0.000211,False
