# About this notebook

In this notebook, a low pass filter will implemented to detect anomalies in time-series data of battery voltages. First the necessay data is loaded from local files, next the necessary functions are defined for labelling anomalies and visualizing them. In the last part, this algorithm is applied to both an anomalous battery pack and normal battery pack to tweak the parameters needed for the anomaly detection.

## Input
 * basePath leading to the stored local .csv-files
 * things and metrics that are needed - in this case the battery voltages

## To be tweaked
 * window_size
 * sigma
 * fixed_std
 
## Output
 * Nr of detected anomalies in function of the chosen parameters
 * Visualization of the anomalies

# Import libraries

In [1]:
import pandas as pd
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.io as pio

In [5]:
basePath = r'D:\Documents\Thesis\Case 1 - data'
# basePath = r'C:\Users\JeffG\Desktop\Case 1 - data'

# Load battery data

In [6]:
def merge_in_dataframe(thingID, listOfMetrics, basePath):
    """
    This function takes all the merged .csv-files and gathers them in one big DataFrame.
    This DataFrame contains 5min averages of all the values.

    Args:
        thingID: The ID of a Thing as defined on the IoT-stack explorer
        listOfMetrics: List of metrics you want to query as defined on the IoT-stack explorer
        basePath: path to the base folder of your project

    Returns:
        DataFrame containg 5min averages of all the metrics defined in the listOfMetrics.
    """
    for i in range(0,len(listOfMetrics)):
        metricID = listOfMetrics[i]
        # Set path
        if thingID == 'davis.davis.weather.1':
            path=basePath +'\\weather_data'
        else:
            path=basePath + '\\battery_data' + '\\' + str(thingID)
        # Read csv file in DataFrame
        df = pd.read_csv(path+'\\'+metricID+'_merged.csv',usecols=['time','value'])
    
        if(i==0):
            # Select timestamp and value from results
            data = df[['time','value']]
            # Convert object to float (BatteryVoltHR is an object for example)
            data["value"] = pd.to_numeric(data.value, errors='coerce')
            # Rename the column
            data.rename(columns={'value': metricID}, inplace=True)
            # Calculate average for time bin and replace NaN with interpolation
            data['time'] = pd.to_datetime(data.time, unit='ms')
            data = data.resample(rule='5Min', on='time').mean().interpolate()
        else:
            data2 = df[['time','value']]
            # Convert object to float (BatteryVoltHR is an object for example)
            data2["value"] = pd.to_numeric(data2.value, errors='coerce')
            # Rename the column
            data2.rename(columns={'value':metricID}, inplace=True)
            # Calculate average for time bin and replace NaN with interpolation
            data2['time'] = pd.to_datetime(data2.time, unit='ms')
            data2 = data2.resample(rule='5Min', on='time').mean().interpolate()
            data = pd.merge(data, data2, on='time')
        
    return data

In [27]:
thingID = 'munisense.msup1g30034'
battery_metricsList = ['Power.BatteryVoltHR']
battery_data_1 = merge_in_dataframe(thingID, battery_metricsList, basePath)
battery_data_1.head()

Unnamed: 0_level_0,Power.BatteryVoltHR
time,Unnamed: 1_level_1
2018-08-31 09:50:00,15.995
2018-08-31 09:55:00,15.995
2018-08-31 10:00:00,15.99
2018-08-31 10:05:00,16.0
2018-08-31 10:10:00,16.0


In [None]:
thingID = 'munisense.msup1i70124'
battery_metricsList = ['Power.BatteryVoltHR']
battery_data_2 = merge_in_dataframe(thingID, battery_metricsList, basePath)
battery_data_2.head()

In [None]:
thingID = 'munisense.msup1h90115'
battery_metricsList = ['Power.BatteryVoltHR']
battery_data_3 = merge_in_dataframe(thingID, battery_metricsList, basePath)
battery_data_3.head()

# Define functions

## Label anomalies

In [10]:
def label_anomaly_fixed_std (row, sigma, fixed_std):
    """
    This function labels data points to be anomalous if they differ a certain times the standard deviation from the moving average.

    Args:
        row: A row of a dataframe that needs to be labelled. The row needs to contain a battery voltage and a moving average.
        sigma: nr of standard deviations the difference needs to be
        fixed_std: a fixed standard deviation

    Returns:
        1 if the data point is anomalous
        0 if the data point is not anomalous
    """
    if (row['Power.BatteryVoltHR'] > (row['moving_average'] + (sigma * fixed_std))) | (row['Power.BatteryVoltHR'] < (row['moving_average'] - (sigma * fixed_std))):
        return 1
    else:
        return 0

In [11]:
def label_anomaly_moving_std (row, sigma):
    """
    This function labels data points to be anomalous if they differ a certain times the standard deviation from the moving average.
    The standard deviation is also calculated in a moving window.

    Args:
        row: A row of a dataframe that needs to be labelled. The row needs to contain a battery voltage and a moving average.
        sigma: nr of standard deviations the difference needs to be

    Returns:
        1 if the data point is anomalous
        0 if the data point is not anomalous
    """
    if (row['Power.BatteryVoltHR'] > (row['moving_average'] + (sigma * row['moving_std']))) | (row['Power.BatteryVoltHR'] < (row['moving_average'] - (sigma * row['moving_std']))):
        return 1
    else:
        return 0

## Low pass filtering

In [12]:
def low_pass_filtering(df, window_size, sigma, fixed_std):
    """
    This function takes a dataframe and flags the anomalies based on a fixed and a moving standard deviation.

    Args:
        df: Dataframe containing the battery voltage
        window_size: window size to calculate a moving average and moving standard deviation
        sigma: number of standard deviation the difference needs to be 
        fixed_std: a static threshol for the standard deviation

    Returns:
        Original dataframe and adds two columns:
            anomaly_flag_fixed_std
            anomaly_flag_moving_std
    """
    # Calculate moving average
    df['moving_average'] = df['Power.BatteryVoltHR'].rolling(window_size).mean()

    # Calculate anomalies
    df['residual'] = df['Power.BatteryVoltHR'] - df['moving_average']
    df['moving_std'] = df['Power.BatteryVoltHR'].rolling(window_size).std()
    df.dropna(inplace=True)
    df['anomaly_flag_moving_std']=df.apply (lambda row: label_anomaly_moving_std(row, sigma), axis=1)
    df['anomaly_flag_fixed_std']=df.apply (lambda row: label_anomaly_fixed_std(row, sigma, fixed_std), axis=1)
    
    return df

## Visualization

In [13]:
def plot_low_pass(df):
    """
    This function creates a visualization of the raw data, the moving average and the detected anomalies.
    
    Args:
        df: Dataframe containing the battery voltage and flags for anomalies as defined in low_pass_filtering()
        
    Returns:
        Plot
    """
    # Create traces
    trace0 = go.Scatter(
        x=df.index,
        y=df['Power.BatteryVoltHR'],
        mode='markers',
        opacity=0.7,
        marker={
            'size': 6
        },
        name='Raw data'
    )
    trace1 = go.Scatter(
        x=df.index,
        y=df['moving_average'],
        mode='lines',
        name='Moving average'
    )
    trace2 = go.Scatter(
        x=fixed_anomalies.index,
        y=fixed_anomalies['Power.BatteryVoltHR'],
        mode='markers',
        marker={
            'size': 10,
            'symbol': 'x',
            'color': 'red'
        },
        name='anomalies'   
    )
    
    data = [trace0, trace1, trace2]
    
    layout = go.Layout(
        xaxis={'title': 'Time'},
        yaxis={'title': 'Battery voltage [V]',
        'range': [0,17.5]},
        margin={'l': 40, 'b': 40, 't': 10, 'r': 10},
        legend={'x': 0, 'y': 0},
        hovermode='closest',
        width=800,
        height=600
    )

    fig= go.Figure(data=data, layout=layout)
    return fig

# Low pass filtering

In [14]:
# Set parameters
window_size = 32
sigma = 3
fixed_std = 0.3

In [15]:
init_notebook_mode(connected=True)

## Anomalous batery pack

### Apply function

In [32]:
window_sizes = [32, 64, 128, 256, 512, 1024, 2048]
fixed_stds = [0.1, 0.2, 0.3, 0.4, 0.5]
for window_size in window_sizes:
    for fixed_std in fixed_stds:
        df = low_pass_filtering(battery_data_1.copy(), window_size, sigma, fixed_std)
        moving_anomalies = df[df['anomaly_flag_moving_std']==1]
        fixed_anomalies = df[df['anomaly_flag_fixed_std']==1]
        fig = plot_low_pass(df)
        pio.write_image(fig, r'C:\Users\Jeff\Dropbox\ICT-Elektronica\Thesis\scripts\Case 1 - Smart Lighting' + '/images/fig1_{}_{}.png'.format(window_size, fixed_std))
        print("window_size: " + str(window_size) + " | fixed_std: " + str(fixed_std) + " | Nr of anomalies: " + str(len(fixed_anomalies)))

window_size: 32 | fixed_std: 0.1 | Nr of anomalies: 4464
window_size: 32 | fixed_std: 0.2 | Nr of anomalies: 2689
window_size: 32 | fixed_std: 0.3 | Nr of anomalies: 1525
window_size: 32 | fixed_std: 0.4 | Nr of anomalies: 1370
window_size: 32 | fixed_std: 0.5 | Nr of anomalies: 1216
window_size: 64 | fixed_std: 0.1 | Nr of anomalies: 7162
window_size: 64 | fixed_std: 0.2 | Nr of anomalies: 5130
window_size: 64 | fixed_std: 0.3 | Nr of anomalies: 4199
window_size: 64 | fixed_std: 0.4 | Nr of anomalies: 3185
window_size: 64 | fixed_std: 0.5 | Nr of anomalies: 2244
window_size: 128 | fixed_std: 0.1 | Nr of anomalies: 13540
window_size: 128 | fixed_std: 0.2 | Nr of anomalies: 8022
window_size: 128 | fixed_std: 0.3 | Nr of anomalies: 6428
window_size: 128 | fixed_std: 0.4 | Nr of anomalies: 5618
window_size: 128 | fixed_std: 0.5 | Nr of anomalies: 4948
window_size: 256 | fixed_std: 0.1 | Nr of anomalies: 17082
window_size: 256 | fixed_std: 0.2 | Nr of anomalies: 14349
window_size: 256 | fi

In [33]:
for window_siz in window_sizes:
    df = low_pass_filtering(battery_data_1.copy(), window_siz, sigma, fixed_std)
    moving_anomalies = df[df['anomaly_flag_moving_std']==1]
    fixed_anomalies = df[df['anomaly_flag_fixed_std']==1]
    print(fixed_anomalies.first_valid_index())

2018-11-07 09:30:00
2018-11-07 09:30:00
2018-11-07 09:30:00
2018-11-07 09:25:00
2018-11-06 08:30:00
2018-11-06 06:30:00
2018-11-06 05:35:00


## Normal batery pack

### Apply function

In [35]:
window_sizes = [32, 64, 128, 256, 512, 1024, 2048]
fixed_stds = [0.1, 0.2, 0.3, 0.4, 0.5]
for window_size in window_sizes:
    for fixed_std in fixed_stds:
        df = low_pass_filtering(battery_data_2.copy(), window_size, sigma, fixed_std)
        moving_anomalies = df[df['anomaly_flag_moving_std']==1]
        fixed_anomalies = df[df['anomaly_flag_fixed_std']==1]
        fig = plot_low_pass(df)
        pio.write_image(fig, r'C:\Users\Jeff\Dropbox\ICT-Elektronica\Thesis\scripts\Case 1 - Smart Lighting' + '/images/fig2_{}_{}.png'.format(window_size, fixed_std))
        print("window_size: " + str(window_size) + " | fixed_std: " + str(fixed_std) + " | Nr of anomalies: " + str(len(fixed_anomalies)))

window_size: 32 | fixed_std: 0.1 | Nr of anomalies: 0
window_size: 32 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 32 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 32 | fixed_std: 0.4 | Nr of anomalies: 0
window_size: 32 | fixed_std: 0.5 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.1 | Nr of anomalies: 27
window_size: 64 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.4 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.5 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.1 | Nr of anomalies: 144
window_size: 128 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.4 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.5 | Nr of anomalies: 0
window_size: 256 | fixed_std: 0.1 | Nr of anomalies: 273
window_size: 256 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 256 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 25

### Visualization

In [36]:
window_sizes = [32, 64, 128, 256, 512, 1024, 2048]
fixed_stds = [0.1, 0.2, 0.3, 0.4, 0.5]
for window_size in window_sizes:
    for fixed_std in fixed_stds:
        df = low_pass_filtering(battery_data_3.copy(), window_size, sigma, fixed_std)
        moving_anomalies = df[df['anomaly_flag_moving_std']==1]
        fixed_anomalies = df[df['anomaly_flag_fixed_std']==1]
        fig = plot_low_pass(df)
        pio.write_image(fig, r'C:\Users\Jeff\Dropbox\ICT-Elektronica\Thesis\scripts\Case 1 - Smart Lighting' + '/images/fig3_{}_{}.png'.format(window_size, fixed_std))
        print("window_size: " + str(window_size) + " | fixed_std: " + str(fixed_std) + " | Nr of anomalies: " + str(len(fixed_anomalies)))

window_size: 32 | fixed_std: 0.1 | Nr of anomalies: 7
window_size: 32 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 32 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 32 | fixed_std: 0.4 | Nr of anomalies: 0
window_size: 32 | fixed_std: 0.5 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.1 | Nr of anomalies: 22
window_size: 64 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.4 | Nr of anomalies: 0
window_size: 64 | fixed_std: 0.5 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.1 | Nr of anomalies: 86
window_size: 128 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.4 | Nr of anomalies: 0
window_size: 128 | fixed_std: 0.5 | Nr of anomalies: 0
window_size: 256 | fixed_std: 0.1 | Nr of anomalies: 138
window_size: 256 | fixed_std: 0.2 | Nr of anomalies: 0
window_size: 256 | fixed_std: 0.3 | Nr of anomalies: 0
window_size: 256

In [40]:
fig = plot_low_pass(df)
pio.write_image(fig, r'C:\Users\JeffG\Dropbox\ICT-Elektronica\Thesis\scripts\Case 1 - Smart Lighting' + '/images/fig2.png')