# Examples

Some examples of how to use the anomaly agent.

<a target="_blank" href="https://colab.research.google.com/github/andrewm4894/anomaly-agent/blob/main/notebooks/examples.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
import os
from anomaly_agent.utils import make_df, make_anomaly_config
from anomaly_agent.plot import plot_df
from anomaly_agent.agent import AnomalyAgent

# set openai api key if not in environment
# os.environ['OPENAI_API_KEY'] = "<your-openai-api-key>"

In [2]:
# get and anomaly config to generate some dummy data
anomaly_cfg = make_anomaly_config()
print(anomaly_cfg)

# generate some dummy data
df = make_df(100, 3, anomaly_config=anomaly_cfg)
df.head()


{'enabled': True, 'fraction': 0.1, 'methods': ['spike', 'drop', 'shift', 'noise'], 'spike_factor': 10, 'shift_value': 3, 'noise_std': 0.2}


Unnamed: 0,timestamp,var1,var2,var3
0,2020-01-01,0.926846,0.834962,0.583337
1,2020-01-02,0.568183,0.842156,0.564705
2,2020-01-03,0.03421,,0.793188
3,2020-01-04,0.803681,0.160352,0.944066
4,2020-01-05,0.063311,0.593429,0.181159


In [3]:
# plot the data
plot_df(df)

In [4]:
# create anomaly agent
anomaly_agent = AnomalyAgent()

# detect anomalies
anomalies = anomaly_agent.detect_anomalies(df)

# detect anomalies without verification
anomalies_no_verify = anomaly_agent.detect_anomalies(df, verify=False)

# print anomalies
print(anomalies)


{'var1': AnomalyList(anomalies=[Anomaly(timestamp='2020-01-24T00:00:00', variable_value=5.37691, anomaly_description='Significantly higher than regular values in the time series.'), Anomaly(timestamp='2020-02-05T00:00:00', variable_value=3.552401, anomaly_description='Significantly higher than regular values in the time series.'), Anomaly(timestamp='2020-02-06T00:00:00', variable_value=3.412687, anomaly_description='Significantly higher than regular values in the time series.'), Anomaly(timestamp='2020-02-19T00:00:00', variable_value=3.636442, anomaly_description='Significantly higher than regular values in the time series.'), Anomaly(timestamp='2020-02-24T00:00:00', variable_value=3.518777, anomaly_description='Significantly higher than regular values in the time series.')]), 'var2': AnomalyList(anomalies=[Anomaly(timestamp='2020-01-03', variable_value=0.0, anomaly_description='Missing data (NaN value) in timeseries.'), Anomaly(timestamp='2020-01-18', variable_value=3.80986, anomaly_d

In [5]:
# get anomalies in long format
df_anomalies_long = anomaly_agent.get_anomalies_df(anomalies)
df_anomalies_long.head()

Unnamed: 0,timestamp,variable_name,value,description
0,2020-01-24,var1,5.37691,Significantly higher than regular values in th...
1,2020-02-05,var1,3.552401,Significantly higher than regular values in th...
2,2020-02-06,var1,3.412687,Significantly higher than regular values in th...
3,2020-02-19,var1,3.636442,Significantly higher than regular values in th...
4,2020-02-24,var1,3.518777,Significantly higher than regular values in th...


In [None]:
# get anomalies in wide format
df_anomalies_wide = anomaly_agent.get_anomalies_df(anomalies, format="wide")

df_anomalies_wide_no_verify = anomaly_agent.get_anomalies_df(anomalies_no_verify, format="wide")

df_anomalies_wide.head()


Unnamed: 0,timestamp,var1,var2,var3
0,2020-01-24,5.37691,,
1,2020-02-05,3.552401,,
2,2020-02-06,3.412687,4.343122,
3,2020-02-19,3.636442,,
4,2020-02-24,3.518777,,


In [None]:
# merge anomalies with original data
df_final = df.merge(df_anomalies_wide, on="timestamp", how="left", suffixes=("", "_anomaly_flag"))

df_final_no_verify = df.merge(df_anomalies_wide_no_verify, on="timestamp", how="left", suffixes=("", "_anomaly_flag"))

df_final.head()


Unnamed: 0,timestamp,var1,var2,var3,var1_anomaly_flag,var2_anomaly_flag,var3_anomaly_flag
0,2020-01-01,0.926846,0.834962,0.583337,,,
1,2020-01-02,0.568183,0.842156,0.564705,,,
2,2020-01-03,0.03421,,0.793188,,0.0,
3,2020-01-04,0.803681,0.160352,0.944066,,,
4,2020-01-05,0.063311,0.593429,0.181159,,,


In [8]:
# plot final data with anomalies
plot_df(df_final)

In [9]:
# plot final data no verify
plot_df(df_final_no_verify)