# Synthetic time series example

This notebook demonstrates the synthetic time series query feature on open industrial data

In [None]:
from matplotlib import pyplot as plt
import os
from datetime import datetime
from getpass import getpass
from cognite.client import CogniteClient
project = 'publicdata'
api_key = os.environ.get('PUBLICDATA_API_KEY') or getpass("Open Industrial Data API-KEY: ")
client = CogniteClient(project=project,api_key=api_key,client_name="STS demo")

## In this tutorial, we will develop a simple synthetic time series for anomaly detection.

In [None]:
ts = client.time_series.retrieve_multiple(external_ids=["pi:160267",'pi:160887'])
ts

## We start with a simple linear regression model

In [None]:
df = client.datapoints.retrieve_dataframe(external_id=['pi:160267','pi:160887'],
                                  start=datetime(2019,10,1),end=datetime(2020,2,1),
                                  aggregates=['interpolation'],granularity='10s',complete='fill,dropna')

In [None]:
from scipy import stats
r = stats.linregress(df)
plt.plot(df.iloc[-10000:,1])
plt.plot(df.iloc[-10000:,0] * r.slope + r.intercept)

## We define the formula via sympy for convenience, avoiding the API syntax

In [None]:
from sympy import symbols
x,y = symbols(['x','y'])
y_pred = x * r.slope + r.intercept

In [None]:
error = 100 * abs(y - y_pred) / (abs(y) + 1e-6) # relative error in %

## let's test our query!

In [None]:
f = "TS{externalId:'sine'} + TS{externalId:'line'}"
dpt = client.datapoints.synthetic.query(expressions=error,
                                           variables={'x':ts[0],'y':ts[1]},
                                           start=datetime(2019,10,8),end=datetime(2019,10,12))
df = dpt.to_pandas()
df.columns = ['error']
df.plot()

In [None]:
## Curiously we have found a period with high error in our model on the first try!