# Examples

Some examples of how to use the anomaly agent.

<a target="_blank" href="https://colab.research.google.com/github/andrewm4894/anomaly-agent/blob/main/notebooks/examples.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
import os
import pandas as pd
from anomaly_agent.utils import make_df, make_anomaly_config
from anomaly_agent.plot import plot_df
from anomaly_agent.agent import AnomalyAgent

pd.set_option('display.max_colwidth', None)

# set openai api key if not in environment
# os.environ['OPENAI_API_KEY'] = "<your-openai-api-key>"

In [2]:
# get and anomaly config to generate some dummy data
anomaly_cfg = make_anomaly_config()
print(anomaly_cfg)

# generate some dummy data
df = make_df(100, 3, anomaly_config=anomaly_cfg)
df.head()


{'enabled': True, 'fraction': 0.05, 'methods': ['spike', 'drop', 'shift', 'noise'], 'spike_factor': 10, 'shift_value': 3, 'noise_std': 0.2}


Unnamed: 0,timestamp,var1,var2,var3
0,2020-01-01,,0.832385,3.098451
1,2020-01-02,0.239348,0.266296,0.795417
2,2020-01-03,0.17846,0.749308,0.255721
3,2020-01-04,0.525892,0.117102,0.979321
4,2020-01-05,0.277341,0.92042,0.663163


In [3]:
# plot the data
plot_df(df)

In [4]:
# create anomaly agent
anomaly_agent = AnomalyAgent()

# detect anomalies
anomalies = anomaly_agent.detect_anomalies(df)

# detect anomalies without verification
anomalies_no_verify = anomaly_agent.detect_anomalies(df, verify=False)

# print anomalies
print(anomalies)


{'var1': AnomalyList(anomalies=[Anomaly(timestamp='2020-01-01', variable_value=0.0, anomaly_description='Missing value at the start of the time series.'), Anomaly(timestamp='2020-03-16', variable_value=3.697953, anomaly_description='Unusually high value compared to the surrounding data.')]), 'var2': AnomalyList(anomalies=[Anomaly(timestamp='2020-04-05', variable_value=7.969151, anomaly_description='Significant spike compared to previous values.')]), 'var3': AnomalyList(anomalies=[Anomaly(timestamp='2020-01-23', variable_value=9.538842, anomaly_description='Significantly higher than previous values, indicating a potential anomaly.'), Anomaly(timestamp='2020-03-28', variable_value=3.79545, anomaly_description='Significantly higher than previous values, indicating a potential anomaly.'), Anomaly(timestamp='2020-04-04', variable_value=3.698473, anomaly_description='Significantly higher than previous values, indicating a potential anomaly.')])}


In [5]:
# get anomalies in long format
df_anomalies_long = anomaly_agent.get_anomalies_df(anomalies)
df_anomalies_long.head()

Unnamed: 0,timestamp,variable_name,value,anomaly_description
0,2020-01-01,var1,0.0,Missing value at the start of the time series.
1,2020-03-16,var1,3.697953,Unusually high value compared to the surrounding data.
2,2020-04-05,var2,7.969151,Significant spike compared to previous values.
3,2020-01-23,var3,9.538842,"Significantly higher than previous values, indicating a potential anomaly."
4,2020-03-28,var3,3.79545,"Significantly higher than previous values, indicating a potential anomaly."


In [6]:
# get anomalies in wide format
df_anomalies_wide = anomaly_agent.get_anomalies_df(anomalies, format="wide")

df_anomalies_wide_no_verify = anomaly_agent.get_anomalies_df(anomalies_no_verify, format="wide")

df_anomalies_wide.head()


Unnamed: 0,timestamp,var1,var2,var3
0,2020-01-01,0.0,,
1,2020-03-16,3.697953,,
2,2020-04-05,,7.969151,
3,2020-01-23,,,9.538842
4,2020-03-28,,,3.79545


In [7]:
# merge anomalies with original data
df_final = df.merge(df_anomalies_wide, on="timestamp", how="left", suffixes=("", "_anomaly_flag"))

df_final_no_verify = df.merge(df_anomalies_wide_no_verify, on="timestamp", how="left", suffixes=("", "_anomaly_flag"))

df_final.head()


Unnamed: 0,timestamp,var1,var2,var3,var1_anomaly_flag,var2_anomaly_flag,var3_anomaly_flag
0,2020-01-01,,0.832385,3.098451,0.0,,
1,2020-01-02,0.239348,0.266296,0.795417,,,
2,2020-01-03,0.17846,0.749308,0.255721,,,
3,2020-01-04,0.525892,0.117102,0.979321,,,
4,2020-01-05,0.277341,0.92042,0.663163,,,


In [8]:
# plot final data with anomalies
fig = plot_df(df_final, return_fig=True)
fig.show()

In [9]:
# plot final data no verify
fig = plot_df(df_final_no_verify, return_fig=True)
fig.show()