# Examples

Some examples of how to use the anomaly agent.

<a target="_blank" href="https://colab.research.google.com/github/andrewm4894/anomaly-agent/blob/main/notebooks/examples.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
import os
from anomaly_agent.utils import make_df, make_anomaly_config
from anomaly_agent.plot import plot_df
from anomaly_agent.agent import AnomalyAgent

# set openai api key if not in environment
# os.environ['OPENAI_API_KEY'] = "<your-openai-api-key>"

In [2]:
# get and anomaly config to generate some dummy data
anomaly_cfg = make_anomaly_config()
print(anomaly_cfg)

# generate some dummy data
df = make_df(100, 3, anomaly_config=anomaly_cfg)
df.head()


{'enabled': True, 'fraction': 0.1, 'methods': ['spike', 'drop', 'shift', 'noise'], 'spike_factor': 10, 'shift_value': 3, 'noise_std': 0.2}


Unnamed: 0,timestamp,var1,var2,var3
0,2020-01-01,0.303834,,0.534006
1,2020-01-02,0.696315,0.460271,0.87047
2,2020-01-03,0.331373,0.386581,0.137942
3,2020-01-04,0.850117,0.20251,0.197663
4,2020-01-05,0.581472,0.982573,0.658703


In [3]:
# plot the data
plot_df(df)

In [4]:
# create anomaly agent
anomaly_agent = AnomalyAgent()

# detect anomalies
anomalies = anomaly_agent.detect_anomalies(df)

# detect anomalies without verification
anomalies_no_verify = anomaly_agent.detect_anomalies(df, verify=False)

# print anomalies
print(anomalies)


{'var1': AnomalyList(anomalies=[Anomaly(timestamp='2020-01-14', variable_value=5.206324, anomaly_description='Significantly higher value than previous entries suggesting an anomaly.'), Anomaly(timestamp='2020-02-05', variable_value=3.511409, anomaly_description='Significantly higher value than previous entries suggesting an anomaly.'), Anomaly(timestamp='2020-02-16', variable_value=7.31036, anomaly_description='Significantly higher value than previous entries suggesting an anomaly.'), Anomaly(timestamp='2020-03-01', variable_value=3.872762, anomaly_description='Significantly higher value than previous entries suggesting an anomaly.'), Anomaly(timestamp='2020-03-09', variable_value=3.996384, anomaly_description='Significantly higher value than previous entries suggesting an anomaly.'), Anomaly(timestamp='2020-03-31', variable_value=9.149813, anomaly_description='Significantly higher value than previous entries suggesting an anomaly.'), Anomaly(timestamp='2020-04-01', variable_value=3.66

In [5]:
# get anomalies in long format
df_anomalies_long = anomaly_agent.get_anomalies_df(anomalies)
df_anomalies_long.head()

Unnamed: 0,timestamp,variable_name,value,description
0,2020-01-14,var1,5.206324,Significantly higher value than previous entri...
1,2020-02-05,var1,3.511409,Significantly higher value than previous entri...
2,2020-02-16,var1,7.31036,Significantly higher value than previous entri...
3,2020-03-01,var1,3.872762,Significantly higher value than previous entri...
4,2020-03-09,var1,3.996384,Significantly higher value than previous entri...


In [6]:
# get anomalies in wide format
df_anomalies_wide = anomaly_agent.get_anomalies_df(anomalies, format="wide")

df_anomalies_wide_no_verify = anomaly_agent.get_anomalies_df(anomalies_no_verify, format="wide")

df_anomalies_wide.head()


Unnamed: 0,timestamp,var1,var2,var3
0,2020-01-14,5.206324,,
1,2020-02-05,3.511409,,
2,2020-02-16,7.31036,,
3,2020-03-01,3.872762,,
4,2020-03-09,3.996384,,


In [7]:
# merge anomalies with original data
df_final = df.merge(df_anomalies_wide, on="timestamp", how="left", suffixes=("", "_anomaly_flag"))

df_final_no_verify = df.merge(df_anomalies_wide_no_verify, on="timestamp", how="left", suffixes=("", "_anomaly_flag"))

df_final.head()


Unnamed: 0,timestamp,var1,var2,var3,var1_anomaly_flag,var2_anomaly_flag,var3_anomaly_flag
0,2020-01-01,0.303834,,0.534006,,0.0,
1,2020-01-02,0.696315,0.460271,0.87047,,,
2,2020-01-03,0.331373,0.386581,0.137942,,,
3,2020-01-04,0.850117,0.20251,0.197663,,,
4,2020-01-05,0.581472,0.982573,0.658703,,,


In [8]:
# plot final data with anomalies
fig = plot_df(df_final, return_fig=True)
fig.show()

In [9]:
# plot final data no verify
fig = plot_df(df_final_no_verify, return_fig=True)
fig.show()