In [3]:
from prophet import Prophet
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import joblib

In [4]:
df = pd.read_csv('apdex_metrics.csv' )

In [5]:
df.shape

(43272, 10)

In [6]:
ts=df[['ds','apdex']]

In [7]:
ts['ds'] = pd.to_datetime(ts['ds'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ts['ds'] = pd.to_datetime(ts['ds'])


In [8]:
ts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43272 entries, 0 to 43271
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      43272 non-null  datetime64[ns]
 1   apdex   43272 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 676.2 KB


In [9]:
ts.rename(columns={"apdex": "y"}, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ts.rename(columns={"apdex": "y"}, inplace=True)


In [10]:
model = Prophet(interval_width=0.99, weekly_seasonality=True, yearly_seasonality=False, daily_seasonality=True)
model.fit(ts)
forecast = model.predict(ts)

DEBUG:cmdstanpy:input tempfile: /tmp/tmp5j1o9kje/et8c5768.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp5j1o9kje/tazw8lrk.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=55737', 'data', 'file=/tmp/tmp5j1o9kje/et8c5768.json', 'init=/tmp/tmp5j1o9kje/tazw8lrk.json', 'output', 'file=/tmp/tmp5j1o9kje/prophet_modelrxtks5xr/prophet_model-20240525232519.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
23:25:19 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
23:25:28 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [11]:
# Предикт делаем на полную исходную выборку:
performance = pd.merge(ts, forecast[['ds','yhat_lower', 'yhat',  'yhat_upper']], on='ds')
performance.head()


Unnamed: 0,ds,y,yhat_lower,yhat,yhat_upper
0,2024-04-15 23:32:00,0.999679,0.984515,0.998596,1.011384
1,2024-04-15 23:33:00,0.999745,0.985434,0.998601,1.012043
2,2024-04-15 23:34:00,0.999687,0.985054,0.998606,1.013063
3,2024-04-15 23:35:00,0.999812,0.983986,0.998611,1.013553
4,2024-04-15 23:36:00,0.999489,0.98563,0.998616,1.013122


In [12]:
performance.rename(columns={'y': 'y_real', 'yhat_lower': 'y_pred_lower', 'yhat': 'y_pred', 'yhat_upper': 'y_pred_upper'}, inplace=True)
performance = performance[['ds', 'y_pred_lower', 'y_pred', 'y_pred_upper', 'y_real']]
performance.head()

Unnamed: 0,ds,y_pred_lower,y_pred,y_pred_upper,y_real
0,2024-04-15 23:32:00,0.984515,0.998596,1.011384,0.999679
1,2024-04-15 23:33:00,0.985434,0.998601,1.012043,0.999745
2,2024-04-15 23:34:00,0.985054,0.998606,1.013063,0.999687
3,2024-04-15 23:35:00,0.983986,0.998611,1.013553,0.999812
4,2024-04-15 23:36:00,0.98563,0.998616,1.013122,0.999489


In [13]:
# Вычисляем стандартное отклонение предсказанных значений
std_dev = performance['y_pred'].std()

# Определяем аномалии с более строгим порогом
performance['anomaly'] = performance.apply(
    lambda row: 1 if (row['y_real'] < (row['y_pred_lower'] - 20 * std_dev)) or (row['y_real'] > (row['y_pred_upper'] + 20 * std_dev)) else 0, axis=1)

anomalies = performance[performance['anomaly'] == 1].sort_values(by='ds')
anomalies.head()

Unnamed: 0,ds,y_pred_lower,y_pred,y_pred_upper,y_real,anomaly
5775,2024-04-19 23:59:00,0.985779,0.998872,1.013183,0.959951,1
5776,2024-04-20 00:00:00,0.984599,0.998878,1.011358,0.959683,1
9266,2024-04-22 10:10:00,0.983361,0.99712,1.012014,0.553747,1
9267,2024-04-22 10:11:00,0.984223,0.997114,1.011271,0.104994,1
9268,2024-04-22 10:12:00,0.981852,0.997108,1.011557,0.516516,1


In [14]:
performance['color'] = np.where(performance['anomaly']== 1, 'red', 'green')
performance['name'] = np.where(performance['anomaly'] == 1, 'Anomaly','Within Confidence Interval')

In [15]:
# Преобразуем исходные данные в DataFrame для Plotly
performance['timestamp'] = performance['ds']
performance['target'] = performance['y_real']
ts_df = performance[['timestamp', 'target']]

# Создаем график временного ряда
fig = px.line(ts_df, x='timestamp', y='target', title='Time Series with Anomalies')

# Добавляем аномалии на график
anomaly_points = performance[performance['anomaly'] == 1]
fig.add_scatter(x=anomaly_points['timestamp'], y=anomaly_points['target'],
                mode='markers', marker=dict(color='red', size=10), name='Anomalies')


In [16]:
# Подготовка итогового DataFrame
result = performance[['ds', 'y_real', 'anomaly']]
result.rename(columns={'ds': 'timestamp', 'y_real': 'Apdex'}, inplace=True)
result['model'] = 'prophet'

result.rename(columns={'ds': 'timestamp', 'Apdex': 'value'}, inplace=True)
result['Модель'] = 'Prophet'
result['Метрика'] = 'Apdex'
result['is_anomaly']=result['anomaly']
result['is_anomaly']=result['is_anomaly'].astype(bool)
# Сохраняем результат в CSV файл
result = result[['Модель', 'Метрика', 'timestamp', 'value',  'is_anomaly']]
result.to_csv('Prophet_Apdex.csv', index=False)

print("Результаты модели сохранены в файл 'result.csv'")



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice

Результаты модели сохранены в файл 'result.csv'


In [17]:
result

Unnamed: 0,Модель,Метрика,timestamp,value,is_anomaly
0,Prophet,Apdex,2024-04-15 23:32:00,0.999679,False
1,Prophet,Apdex,2024-04-15 23:33:00,0.999745,False
2,Prophet,Apdex,2024-04-15 23:34:00,0.999687,False
3,Prophet,Apdex,2024-04-15 23:35:00,0.999812,False
4,Prophet,Apdex,2024-04-15 23:36:00,0.999489,False
...,...,...,...,...,...
43267,Prophet,Apdex,2024-05-16 00:54:00,0.998828,False
43268,Prophet,Apdex,2024-05-16 00:55:00,0.998495,False
43269,Prophet,Apdex,2024-05-16 00:56:00,0.997736,False
43270,Prophet,Apdex,2024-05-16 00:57:00,0.997999,False
