# Examples

Some examples of how to use the anomaly agent.

<a target="_blank" href="https://colab.research.google.com/github/andrewm4894/anomaly-agent/blob/main/notebooks/examples.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
import os
from anomaly_agent.utils import make_df, make_anomaly_config
from anomaly_agent.plot import plot_df
from anomaly_agent.agent import AnomalyAgent

# set openai api key if not in environment
# os.environ['OPENAI_API_KEY'] = "<your-openai-api-key>"


In [2]:
# get and anomaly config to generate some dummy data
anomaly_cfg = make_anomaly_config()
print(anomaly_cfg)

# generate some dummy data
df = make_df(100, 3, anomaly_config=anomaly_cfg)
df.head()


{'enabled': True, 'fraction': 0.1, 'methods': ['spike', 'drop', 'shift', 'noise'], 'spike_factor': 10, 'shift_value': 3, 'noise_std': 0.2}


Unnamed: 0,timestamp,var1,var2,var3
0,2020-01-01,0.789249,0.229507,0.08215
1,2020-01-02,0.129874,0.451953,0.633425
2,2020-01-03,0.968297,0.835571,0.664757
3,2020-01-04,0.644651,0.644888,0.071622
4,2020-01-05,0.523788,0.907875,0.486874


In [3]:
# plot the data
plot_df(df)

In [4]:
# create anomaly agent
anomaly_agent = AnomalyAgent()

# detect anomalies
anomalies = anomaly_agent.detect_anomalies(df)

# print anomalies
print(anomalies)


{'var1': AnomalyList(anomalies=[Anomaly(timestamp='2020-02-05', variable_value=3.279153, anomaly_description='Abrupt spike in value, significantly higher than previous observations.'), Anomaly(timestamp='2020-02-15', variable_value=5.001551, anomaly_description='Abrupt spike in value, significantly higher than previous observations.'), Anomaly(timestamp='2020-02-20', variable_value=3.526827, anomaly_description='Abrupt spike in value, significantly higher than previous observations.'), Anomaly(timestamp='2020-03-23', variable_value=3.735584, anomaly_description='Abrupt spike in value, significantly higher than previous observations.'), Anomaly(timestamp='2020-04-05', variable_value=8.207361, anomaly_description='Abrupt spike in value, significantly higher than previous observations.'), Anomaly(timestamp='2020-02-06', variable_value=0.0, anomaly_description='Missing value (NaN) detected.'), Anomaly(timestamp='2020-02-24', variable_value=0.0, anomaly_description='Missing value (NaN) dete

In [5]:
# get anomalies in long format
df_anomalies_long = anomaly_agent.get_anomalies_df(anomalies)
df_anomalies_long.head()

Unnamed: 0,timestamp,variable_name,value,description
0,2020-02-05,var1,3.279153,"Abrupt spike in value, significantly higher th..."
1,2020-02-15,var1,5.001551,"Abrupt spike in value, significantly higher th..."
2,2020-02-20,var1,3.526827,"Abrupt spike in value, significantly higher th..."
3,2020-03-23,var1,3.735584,"Abrupt spike in value, significantly higher th..."
4,2020-04-05,var1,8.207361,"Abrupt spike in value, significantly higher th..."


In [6]:
# get anomalies in wide format
df_anomalies_wide = anomaly_agent.get_anomalies_df(anomalies, format="wide")
df_anomalies_wide.head()


Unnamed: 0,timestamp,var1,var2,var3
0,2020-02-05,3.279153,,
1,2020-02-15,5.001551,3.374155,2.418594
2,2020-02-20,3.526827,,
3,2020-03-23,3.735584,,
4,2020-04-05,8.207361,,


In [7]:
# merge anomalies with original data
df_final = df.merge(df_anomalies_wide, on="timestamp", how="left", suffixes=("", "_anomaly_flag"))
df_final.head()


Unnamed: 0,timestamp,var1,var2,var3,var1_anomaly_flag,var2_anomaly_flag,var3_anomaly_flag
0,2020-01-01,0.789249,0.229507,0.08215,,,
1,2020-01-02,0.129874,0.451953,0.633425,,,
2,2020-01-03,0.968297,0.835571,0.664757,,,
3,2020-01-04,0.644651,0.644888,0.071622,,,
4,2020-01-05,0.523788,0.907875,0.486874,,,


In [8]:
# plot final data with anomalies
plot_df(df_final)