# Plot motifs

This file plot univariant and multivariant motifs, calculated by matrix profile.

**Import Libraries**

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
from kando import kando_client
import datetime
import pandas as pd
import json
import stumpy
from matplotlib.patches import Rectangle

from matplotlib import pyplot as plt
%matplotlib inline

**Global Variables**

In [None]:
RESAMPLE = '5min'
WINDOW = 24
THRESHOLD = 0.05
NODE = 1012
START = datetime.datetime(2020, 1, 1, 0, 0).timestamp()

**Functions**

In [None]:
def connect():
    """
    connect to Kando API with json file
    :return: client
    """

    with open('key.json') as f:
        api_login = json.load(f)

    url = "https://kando-staging.herokuapp.com"
    client = kando_client.client(url, api_login['key'], api_login['secret'])
    return client

In [None]:
def create_df(data):
    """
    :param data: dictionary of the data set
    :return: DataFrame of the data set
    """

    df = pd.DataFrame.from_dict(data['samplings'], orient='index')
    df.visit.fillna(value=False, inplace=True)
    df.Battery.fillna(value=0, inplace=True)
    df.Signal.fillna(value=0, inplace=True)

    df['DateTime'] = pd.to_datetime(df['DateTime'], unit='s')
    df = df.set_index('DateTime')
    df.drop(df.index[0], inplace=True)

    return df

In [None]:
def plot_columns(df, cols_to_plot, sampling_unit='D'):
    """
    :param df: pandas DataFrame
    :param cols_to_plot: list of columns to plot
    :param sampling_unit: resampling unit size, default is day
    :return: plot chosen time series columns from DataFrame with optional time resampling
    """

    df = df.resample(sampling_unit).mean()
    data_dic = {
        'PI': 'Polution_Index ',
        'EC': 'Electrical_Conductivity',
        'PH': 'pH',
        'WL': 'Water_Level',
        'ORP': 'Oxidation_reduction_potential',
        'TEMPERATURE': 'Temperature',
        'COD': 'Chemical_Oxygen_Demand',
        'TSS': 'Total_suspended_solids',
        'FLOW': 'Flow',
        'Battery': 'Battery_Level',
        'Signal': 'Signal',
        'MS': 'MS',
        'gaps': 'gaps'
    }

    for col_ in cols_to_plot:
        plt.figure(figsize=(15, 5))
        df[f'{col_}'].plot()
        plt.title(data_dic[f'{col_}'] + ' OVER TIME', fontsize=18)
        plt.xlabel('Time')
        plt.ylabel(str(col_))
        plt.show()

In [None]:
def impute_nulls_with_time_interpolation(df, columns_to_impute, unit):
    """
    :param df: pandas DataFrame
    :param columns_to_impute: list of columns names
    :return: imputed DataFrame
    """

    df = df.resample(f'{unit}').mean()
    for col in columns_to_impute:
        df[f'{col}'] = df[f'{col}'].interpolate(method='time')
    return df

In [None]:
def create_matrix_profile_dictionary(df, target_cols, window=24):
    """
    :param df: pandas DataFrame
    :param target_cols: list of columns names
    :param window: window size
    :return: dictionary of matrix profile
    """

    mps = {}
    for col_ in target_cols:
        mps[f'mp_{col_}'] = stumpy.stump(df[f'{col_}'], m)
    mps['All_dimensions'] = stumpy.mstump(df[target_cols], m)
    return (mps)

In [None]:
def plot_univariant_matrix_profiles_dictionary(df, mps, cols_to_plot):
    """
    :param df: pandas DataFrame
    :param mps: dictionary of matrix profile
    :param cols_to_plot: list of columns names
    :return: plot time series and matrix profile to chosen columns 
    """
       
    for col_ in cols_to_plot:
        plt.subplots(sharex=True, figsize=(30,8), gridspec_kw={'hspace': 0})
        plt.subplot(2, 1, 1)
        df[f'{col_}'].plot()
        plt.title(f'{col_} OVER TIME \n And {col_} 1 day window matrix profile ', fontsize=20)
        plt.xlabel('Time')
        plt.ylabel(f'{col_} level', fontsize='16')   
        plt.subplot(2, 1, 2)
        plt.plot(mps[f'mp_{col_}'][:, 0], C='g')
        plt.xlabel('index')
        plt.ylabel(f'Distance to \n nearest \n neighbor', fontsize='16')   
        plt.show()

In [None]:
def plot_univariant_matrix_profiles_and_motifs(df,
                                               mps,
                                               cols_to_plot,
                                               window=24,
                                               threshold=0.05):
    """
    :param df: pandas DataFrame
    :param mps: dictionary of matrix profile
    :param cols_to_plot: list of columns names
    :param window: window size
    :param threshold: the top % of most similar motifs
    :return: plot time series and matrix profile to chosen columns, and a separate plots for the motifs
    """

    new_df = df.reset_index()
    for col_ in cols_to_plot:

        groups = discover_motifs_groups(mps[f'mp_{col_}'], threshold)
        motif_colors = [
            'red', 'blue', 'green', 'orange', 'gray', 'purple', 'teal', 'pink',
            'brown', 'yellow'
        ]

        fig, ax = plt.subplots(3, figsize=(240, 12), gridspec_kw={'hspace': 0})

        ax[0].set_title(f'FEATURES OVER TIME', fontsize=20)

        i = 0
        graph_colors = ['black', 'green', 'orange']
        for col in cols_to_plot:
            if col != col_:
                ax[i].plot(df[f'{col}'], c=graph_colors[i])
                ax[i].set_ylabel(f'{col} flow')
                i += 1

        plt.show()

        fig, ax = plt.subplots(2, figsize=(240, 12), gridspec_kw={'hspace': 0})

        ax[0].set_title(
            f'{col_} OVER TIME \n And {col_} {window} hours window matrix profile ',
            fontsize=20)

        ax[0].plot(new_df[f'{col_}'])
        ax[0].set_ylabel(f'{col_} level', fontsize='16')

        ax[1].plot(mps[f'mp_{col_}'][:, 0], C='g')
        ax[1].set_xlabel('Time', fontsize='15')
        ax[1].set_ylabel(f'Distance to \n nearest \n neighbor', fontsize='15')

        for i, group in enumerate(groups):
            for motif in group:
                rect = Rectangle((motif, new_df[col_].min()),
                                 window,
                                 new_df[col_].max(),
                                 facecolor=motif_colors[i],
                                 alpha=0.2)
                ax[0].add_patch(rect)
                ax[1].axvline(motif, linestyle="dashed", color=motif_colors[i])
                ax[1].scatter(motif,
                              mps[f'mp_{col_}'][motif, 0],
                              color=motif_colors[i],
                              marker='*',
                              s=200)

        plt.show()

        plot_groups(df, groups, col_, window)

In [None]:
def simm(motif, matrix_profile):
    """
    :param motif: value of motif
    :param matrix_profile: matrix profile of chosen column
    :return: list of all indices in the matrix profile that contain similar value of the motif
    """
    
    sims = []
    for index, point in enumerate(matrix_profile[:, 1]):
        if point == motif[1]:
            sims.append(index)
    return sims

In [None]:
def simms(motif, matrix_profile):
    """
    :param motif:  value of motif
    :param matrix_profile: matrix profile of chosen column
    :return: list of all indices in the matrix profile that contain similar value of the motif, 
    and similar value of the other values that found
    """

    i = 0
    allsimms = simm(motif, matrix_profile)
    while i != len(allsimms):
        for index, point in enumerate(matrix_profile[:, 1]):
            if point == allsimms[i] and index not in allsimms:
                allsimms.append(index)
        i += 1
    return (allsimms)

In [None]:
def discover_motifs_groups(mps, treshold):
    """
    :param mps: matrix profile of chosen column
    :param treshold: the top % of most similar groups
    :return: list of lists containing indices of the most similar data points
    """
      
    mins = [[mps[i][0], i, mps[i][1]]
            for i in range(2,
                           len(mps) - 2)
            if mps[i - 1][0] > mps[i][0]
            and mps[i][0] < mps[i + 1][0]
            and mps[i - 2][0] > mps[i - 1][0]
            and mps[i + 1][0] < mps[i + 2][0]
           ]
    if int(len(mins)*treshold)<10:
        groups_size = int(len(mins)*treshold)
    else:
        groups_size = 10
    motifs = sorted(mins, key = lambda x: x[0])[:groups_size]
    
    groups_check = []
    groups = []
    for motif in motifs:
        if len(simms(motif, mps))>1 and set(simms(motif, mps)) not in groups_check:
            groups_check.append(set(simms(motif, mps)))
            groups.append(simms(motif, mps))

    return (groups[:])

In [None]:
def plot_groups(df, groups, col, window=24):
    """
    :param df: pandas DataFrame
    :param groups: list of lists containing indices of the most similar data points
    :param col: chosen col to plot
    :param window: window size
    :return: plot groups of motifs for the chosen columns
    """

    colors = [
        'red', 'blue', 'green', 'orange', 'gray', 'purple', 'teal', 'pink',
        'brown', 'yellow'
    ]
    for i, group in enumerate(groups):
        fig = plt.figure(figsize=(15, 3))
        ax = fig.add_subplot(1, 1, 1)
        ax.set_title(f'{col} motifs', fontsize=20)
        for similar in group:
            motif = df[f'{col}'][similar:similar + m]
            date = df[f'{col}'].iloc[similar:similar + m + 1]
            ax.plot(
                range(1, m + 1),
                motif,
                label=
                f"{date.index[0].date()}: {date.index[0].hour}:{date.index[0].minute} - {date.index[-1].hour}:{date.index[-1].minute}"
            )
            plt.legend(bbox_to_anchor=(-0.05, 1))
        ax.patch.set_facecolor(colors[i])
        ax.patch.set_alpha(0.1)
        plt.show()

In [None]:
def plot_multi_variant_matrix_profile(mps, cols_to_plot):
    """
    :param mps: dictionary of matrix profile
    :param cols_to_plot: list of columns names
    :return: plot the multi variant matrix profile and mark in black the most minimal graph
    """

    plt.figure(figsize=(30, 6))
    for col_ in cols_to_plot:
        plt.plot(mps[f'mp_{col_}'][:, 0], label=col_)
        plt.legend(title='title', bbox_to_anchor=(-0.05, 1))
    plt.plot(mps[f'All_dimensions'][0][:, 0], 'k--', linewidth=3, label='all')
    plt.title(f'All dimensions 1 day window matrix profile', fontsize=20)
    plt.xlabel('index')
    plt.ylabel(f'Distance to \n nearest neighbor', fontsize='18')
    plt.show()

**Connect to API**

In [None]:
client = connect()

**Load data**

In [None]:
df_Train = client.get_all(point_id=NODE, start=START)

**Create DataFrame**

In [None]:
df = create_df(df_Train)
df.head()

**Relevant features**

In [None]:
cols_to_plot = ['PI', 'EC', 'PH', 'ORP', 'TEMPERATURE']

**Plotting on a full time-line (with optional resampling)**

In [None]:
plot_columns(df, cols_to_plot, RESAMPLE)

**Resampling the data, imputing nulls with time_interpolations and plotting**

In [None]:
df = impute_nulls_with_time_interpolation(df, cols_to_plot, RESAMPLE)
plot_columns(df, cols_to_plot, RESAMPLE)

### Plot matrix profile

In [None]:
df = df.iloc[:1000]
df.shape

In [None]:
cols_for_matrix_profile = ['EC', 'PH', 'ORP', 'TEMPERATURE']
mps = create_matrix_profile_dictionary(df, cols_for_matrix_profile, m=WINDOW)

**Plot univariant matrix profiles**

In [None]:
plot_univariant_matrix_profiles_dictionary(df, mps, cols_for_matrix_profile)

**Plot univariant matrix profiles and motifs**

In [None]:
plot_univariant_matrix_profiles_and_motifs(df,
                                           mps,
                                           cols_for_matrix_profile,
                                           WINDOW,
                                           threshold=THRESHOLD)

**Plot univariant motifs**

In [None]:
for col in cols_for_matrix_profile:
    groups = discover_motifs_groups(mps[f'mp_{col}'], THRESHOLD)
    plot_groups(df, groups, f'{col}', WINDOW)

**Plot multi variant matrix profiles**

In [None]:
plot_multi_variant_matrix_profile(mps, cols_for_matrix_profile)