# Compute matrix profiles

## Introduction to matrix profiles
The matrix profile is a rather simple method to detect motifs and anomalies in time series data. It can be used to find global similarities in a time series or to detect similar features by a query search using sliding windows. The basic idea is to create a window of size m and slide this window along a time series. Then the euclidian distance between 



With a time series of length n and a window size of m we extract n-m+1 subsequences. The global matrix profile finds the distance from a subsequence to a neirest neighbour.  

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
import matrixprofile as mp
import glob as glob
import matplotlib.gridspec as gridspec
import random

plt.rcParams.update({'font.size': 17})



%matplotlib notebook



In [2]:
"""Functions from B_series_functions"""

"""Create a time array from B series indexes"""
def time_array(start,end):
    time_s = np.linspace(0,end-start,end-start)/100#convert to seconds
    return time_s

"""find index of time in video of a given drifter deployment"""
def index_finder(cp_times,time_s,start_n):
    length = len(cp_times)
    indexes = np.zeros(length)
    for i in range(length):#find indexes in time_s of the features
        indexes[i] = (next(j for j, _ in enumerate(time_s) if np.isclose(_, cp_times[i], 0.01)))    
    indexes = indexes.astype(int) + start_n
    return indexes

def read_B_series_subfolders(path):
    # create a list of file and sub directories 
    # names in the given directory 
    subfolders = os.listdir(path)
    file_list = []

    # Iterate over all the entries
    for names in subfolders:
        # Create full path
        fullPath = os.path.join(path, names)
        # If entry is a directory then get the list of files in this directory 
        temp_path = fullPath#for glob
        if os.path.isdir(fullPath):
            file_list = file_list + read_B_series_subfolders(fullPath)   
        else:
            txt_files = glob.glob(path + "/*.csv")#only read txt files
            if (txt_files != []):
                file_list.append(txt_files)
    return file_list

# Load data for mp

In [400]:
####################
#Cross section A5
####################
A5_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/A5"
A5B14 = pd.read_csv(A5_path+"/B14.csv",names = ["time s/100","pressure"],header = 0)
A5B17 = pd.read_csv(A5_path+"/B17.csv",names = ["time s/100","pressure"],header = 0)
A5B56 = pd.read_csv(A5_path+"/B56.csv",names = ["time s/100","pressure"],header = 0)
A5B75 = pd.read_csv(A5_path+"/B75.csv",names = ["time s/100","pressure"],header = 0)
A5B79 = pd.read_csv(A5_path+"/B79.csv",names = ["time s/100","pressure"],header = 0)
A5B80 = pd.read_csv(A5_path+"/B80.csv",names = ["time s/100","pressure"],header = 0)
A5B82 = pd.read_csv(A5_path+"/B82.csv",names = ["time s/100","pressure"],header = 0)
A5B85 = pd.read_csv(A5_path+"/B85.csv",names = ["time s/100","pressure"],header = 0)
A5B86 = pd.read_csv(A5_path+"/B86.csv",names = ["time s/100","pressure"],header = 0)

A5B14 = A5B14.iloc[19015:20680]
A5B17 = A5B17.iloc[21495:23125]
A5B56 = A5B56.iloc[21744:23239]
A5B75 = A5B75.iloc[23382:25032]
A5B79 = A5B79.iloc[25561:27359]
A5B80 = A5B80.iloc[27629:29151]
A5B82 = A5B82.iloc[30010:31527]
A5B85 = A5B85.iloc[32284:34086]
A5B86 = A5B86.iloc[34664:36260]

####################
#Cross section A6
####################
A6_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/A6"
A6B13 = pd.read_csv(A6_path+"/B13.csv",names = ["time s/100","pressure"],header = 0)
A6B33 = pd.read_csv(A6_path+"/B33.csv",names = ["time s/100","pressure"],header = 0)
A6B55 = pd.read_csv(A6_path+"/B55.csv",names = ["time s/100","pressure"],header = 0)
A6B65 = pd.read_csv(A6_path+"/B65.csv",names = ["time s/100","pressure"],header = 0)
A6B68 = pd.read_csv(A6_path+"/B68.csv",names = ["time s/100","pressure"],header = 0)
A6B73 = pd.read_csv(A6_path+"/B73.csv",names = ["time s/100","pressure"],header = 0)
A6B74 = pd.read_csv(A6_path+"/B74.csv",names = ["time s/100","pressure"],header = 0)
A6B78 = pd.read_csv(A6_path+"/B78.csv",names = ["time s/100","pressure"],header = 0)


A6B13 = A6B13.iloc[11753:14290]
A6B33 = A6B33.iloc[18090:20205]
A6B55 = A6B55.iloc[19786:21671]
A6B65 = A6B65.iloc[19807:21768]
A6B68 = A6B68.iloc[20381:22375]
A6B73 = A6B73.iloc[21572:23631]#[21592:23423]
A6B74 = A6B74.iloc[23217:25225]
A6B78 = A6B78.iloc[24423:26560]


####################
#Cross section A7
####################
A7_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/A7"
A7B14 = pd.read_csv(A7_path+"/B14.csv",names = ["time s/100","pressure"],header = 0)
A7B17 = pd.read_csv(A7_path+"/B17.csv",names = ["time s/100","pressure"],header = 0)
A7B56 = pd.read_csv(A7_path+"/B56.csv",names = ["time s/100","pressure"],header = 0)
A7B68 = pd.read_csv(A7_path+"/B68.csv",names = ["time s/100","pressure"],header = 0)
A7B73 = pd.read_csv(A7_path+"/B73.csv",names = ["time s/100","pressure"],header = 0)
A7B78 = pd.read_csv(A7_path+"/B78.csv",names = ["time s/100","pressure"],header = 0)
A7B79 = pd.read_csv(A7_path+"/B79.csv",names = ["time s/100","pressure"],header = 0)
A7B82 = pd.read_csv(A7_path+"/B82.csv",names = ["time s/100","pressure"],header = 0)
A7B83 = pd.read_csv(A7_path+"/B83.csv",names = ["time s/100","pressure"],header = 0)

A7B14 = A7B14.iloc[14211:17500]
A7B17 = A7B17.iloc[15669:18274]
A7B56 = A7B56.iloc[16822:19836]
A7B68 = A7B68.iloc[17281:20490]
A7B73 = A7B73.iloc[17958:20675]
A7B78 = A7B78.iloc[18848:21768]
A7B79 = A7B79.iloc[20125:23334]
A7B82 = A7B82.iloc[22099:25487]
A7B83 = A7B83.iloc[23970:26942]




####################
#Cross section A8
####################
A8_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/A8"
A8B10 = pd.read_csv(A8_path+"/B10.csv",names = ["time s/100","pressure"],header = 0)
A8B13 = pd.read_csv(A8_path+"/B13.csv",names = ["time s/100","pressure"],header = 0)
A8B33 = pd.read_csv(A8_path+"/B33.csv",names = ["time s/100","pressure"],header = 0)
A8B55 = pd.read_csv(A8_path+"/B55.csv",names = ["time s/100","pressure"],header = 0)
A8B65 = pd.read_csv(A8_path+"/B65.csv",names = ["time s/100","pressure"],header = 0)
A8B74 = pd.read_csv(A8_path+"/B74.csv",names = ["time s/100","pressure"],header = 0)
A8B75 = pd.read_csv(A8_path+"/B75.csv",names = ["time s/100","pressure"],header = 0)
A8B80 = pd.read_csv(A8_path+"/B80.csv",names = ["time s/100","pressure"],header = 0)
A8B84 = pd.read_csv(A8_path+"/B84.csv",names = ["time s/100","pressure"],header = 0)
A8B85 = pd.read_csv(A8_path+"/B85.csv",names = ["time s/100","pressure"],header = 0)
A8B86 = pd.read_csv(A8_path+"/B86.csv",names = ["time s/100","pressure"],header = 0)

x = A8B13["pressure"].values
A8B80 = A8B80.iloc[15795:19699]
A8B10 = A8B10.iloc[5704:10144]
A8B13 = A8B13.iloc[6600:10428]
A8B33 = A8B33.iloc[8207:12000]
A8B55 = A8B55.iloc[10123:13754]
A8B65 = A8B65.iloc[10988:14720]#This sensor might be faulty
A8B74 = A8B74.iloc[11687:15677]
A8B75 = A8B75.iloc[14682:19004]
A8B84 = A8B84.iloc[18480:22787]
A8B85 = A8B85.iloc[19545:23467]
A8B86 = A8B86.iloc[20255:24166]

#load exel sheet of drifter observations
B80_A8_excel_sheet_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/drifter_videos/2020/Video_features_A8_B80.xlsx"
B80_A8_SS = pd.read_excel(B80_A8_excel_sheet_path,header = 3)
A8_steps = B80_A8_SS[B80_A8_SS["Feature id"] == 1].reset_index(drop=True)
A8_start = 15784
A8_end = 19699
time_A8 = time_array(A8_start,A8_end)
A8_HF_idx = index_finder(B80_A8_SS["Deployment_time"],time_A8,A8_start)-A8_start
A8_steps_idx = index_finder(A8_steps["Deployment_time"],time_A8,A8_start)-A8_start


#load exel sheet of drifter observations
B13_A8_excel_sheet_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/drifter_videos/2020/Video_features_A8_B13.xlsx"
B13_A8_SS = pd.read_excel(B13_A8_excel_sheet_path,header = 3)
B13_A8_steps = B13_A8_SS[B13_A8_SS["Feature id"] == 1].reset_index(drop=True)


B13_A8_start = 6600#
B13_A8_end = 11500#

time_B13_A8 = time_array(B13_A8_start,B13_A8_end)

B13_A8_HF_idx = index_finder(B13_A8_SS["Deployment_time"],time_B13_A8,B13_A8_start)-B13_A8_start
B13_A8_steps_idx = index_finder(B13_A8_steps["Deployment_time"],time_B13_A8,B13_A8_start)-B13_A8_start



####################
#Cross section A13
#B14 file is messed up so we dont have the drifter features because this was the deployment filmed
####################
A13_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/A13"
A13B14 = pd.read_csv(A13_path+"/B14.csv",names = ["time s/100","pressure"],header = 0)
A13B33 = pd.read_csv(A13_path+"/B33.csv",names = ["time s/100","pressure"],header = 0)
A13B55 = pd.read_csv(A13_path+"/B55.csv",names = ["time s/100","pressure"],header = 0)
A13B65 = pd.read_csv(A13_path+"/B65.csv",names = ["time s/100","pressure"],header = 0)
A13B74 = pd.read_csv(A13_path+"/B74.csv",names = ["time s/100","pressure"],header = 0)
A13B75 = pd.read_csv(A13_path+"/B75.csv",names = ["time s/100","pressure"],header = 0)
A13B79 = pd.read_csv(A13_path+"/B79.csv",names = ["time s/100","pressure"],header = 0)
A13B80 = pd.read_csv(A13_path+"/B80.csv",names = ["time s/100","pressure"],header = 0)
A13B86 = pd.read_csv(A13_path+"/B86.csv",names = ["time s/100","pressure"],header = 0)

#A13B14 = A13B14#incomplete
#A13B33 = A13B33#incomplete
A13B55 = A13B55.iloc[12846:22599]
A13B65 = A13B65.iloc[14440:24317]
A13B74 = A13B74.iloc[17584:27731]
A13B75 = A13B75.iloc[20766:30950]
A13B79 = A13B79.iloc[22944:32801]
A13B80 = A13B80.iloc[24547:34510]
A13B86 = A13B86.iloc[29189:41027]

####################
#Cross section A14
####################
A14_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/A14"
A14B13 = pd.read_csv(A14_path+"/B13.csv",names = ["time s/100","pressure"],header = 0)
A14B17 = pd.read_csv(A14_path+"/B17.csv",names = ["time s/100","pressure"],header = 0)
A14B56 = pd.read_csv(A14_path+"/B56.csv",names = ["time s/100","pressure"],header = 0)
A14B78 = pd.read_csv(A14_path+"/B78.csv",names = ["time s/100","pressure"],header = 0)
A14B82 = pd.read_csv(A14_path+"/B82.csv",names = ["time s/100","pressure"],header = 0)
A14B84 = pd.read_csv(A14_path+"/B84.csv",names = ["time s/100","pressure"],header = 0)
A14B85 = pd.read_csv(A14_path+"/B85.csv",names = ["time s/100","pressure"],header = 0)

#cut out when drifter lying still
A14B13 = A14B13.iloc[6000:18050]
A14B17 = A14B17.iloc[10350:23800]
A14B56 = A14B56.iloc[12500:26500]
A14B78 = A14B78.iloc[20250:33500]
A14B82 = A14B82.iloc[23000:36500]
A14B84 = A14B84.iloc[26400:-1]
A14B85 = A14B85.iloc[26600:40200]
#A14B85['pressure'].plot(figsize = (10,7))

#load exel sheet of drifter observations
B56_A14_excel_sheet_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/drifter_videos/2020/Video_features_A14_B56_reanalysed.xlsx"
B56_A14_SS = pd.read_excel(B56_A14_excel_sheet_path,header = 8)
B56_A14_SS["Middle_t"] = B56_A14_SS["Stop_corr"]-(B56_A14_SS["Stop_corr"]-B56_A14_SS["Start_corr"])/2
A14steps = B56_A14_SS[B56_A14_SS["Feature id"] == 1].reset_index(drop=True)
A14other = B56_A14_SS[B56_A14_SS["Feature id"] == 2].reset_index(drop=True)
time_A14 = time_array(12500,26500)
A14_HF_idx = index_finder(B56_A14_SS["Middle_t"],time_A14,12500)-12500
steps_idx = index_finder(A14steps["Middle_t"],time_A14,12500)-12500
#A14_HF_stop_idx = index_finder(B56_A14_HF_stop,time_A14,start)-start

####################
#Cross section A15
####################

A15_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/A15"
A15B33 = pd.read_csv(A15_path+"/B33.csv",names = ["time s/100","pressure"],header = 0)
A15B55 = pd.read_csv(A15_path+"/B55.csv",names = ["time s/100","pressure"],header = 0)
A15B68 = pd.read_csv(A15_path+"/B68.csv",names = ["time s/100","pressure"],header = 0)
A15B74 = pd.read_csv(A15_path+"/B74.csv",names = ["time s/100","pressure"],header = 0)
A15B80 = pd.read_csv(A15_path+"/B80.csv",names = ["time s/100","pressure"],header = 0)
A15B84 = pd.read_csv(A15_path+"/B84.csv",names = ["time s/100","pressure"],header = 0)
A15B85 = pd.read_csv(A15_path+"/B85.csv",names = ["time s/100","pressure"],header = 0)

A15B33 = A15B33.iloc[8108:23980]
A15B55 = A15B55.iloc[10062:25257]
A15B68 = A15B68.iloc[14668:31040]
A15B74 = A15B74.iloc[16715:32478]
#A15B80 = A15B80.iloc[:]#incomplete
A15B84 = A15B84.iloc[22442:40024]
A15B85 = A15B85.iloc[23411:39380]

In [376]:
x = A7B14.iloc[14211:17400]
x = A7B17.iloc[15669:18174]
x = A7B56.iloc[16822:19736]
x = A7B68.iloc[17281:20390]
x = A7B73.iloc[17958:20575]
x = A7B78.iloc[18848:21568]
x = A7B79.iloc[20125:23234]
x = A7B82.iloc[22099:25387]
x = A7B83.iloc[23970:26842]
print(len(x))


fig, ax = plt.subplots()
#plt.plot(x1['pressure'])
plt.plot(x['pressure'])
plt.show()



2872


<IPython.core.display.Javascript object>

In [5]:



def mp_analysis(full_path):
    compute_motifs = "off"
    analyze = "on"
    
    df = pd.read_csv(full_path,names = ["time s/100","pressure"],header = 0)
    df = df.iloc[12500:26500]
    #df['pressure'].plot(figsize = (20,7))
    #plt.show()
    
    if compute_motifs == "on":


        windows = [
        ('1 sec', 100)]

        profiles = {}

        for label, window_size in windows:
            profile = mp.compute(df['pressure'].values, window_size)
            key = '{} Profile'.format(label)
            profiles[key] = profile
            motifs, neighbours = mp.discover.motifs(profile,k = 20)

            #snip = mp.discover.snippets(df['pressure'].values,10,3)

        #Plot the signal data
        fig, axes = plt.subplots(1,1,sharex=True,figsize=(15,10))

        for ax_idx, window in enumerate(windows):
            key = '{} Profile'.format(window[0])
            profile = profiles[key]
            axes[ax_idx].plot(profile['mp'])
            #axes[ax_idx].plot(snip['snippet'])
            axes[ax_idx].set_title(key)

        plt.xlabel('Pickup Datetime')
        plt.tight_layout()
        plt.show()
    
    if analyze == "on":
        profile, figures = mp.analyze(df['pressure'].values)
        return profile, figures
    

#profile, figures = mp_analysis(A14_path+"/B56.csv")







In [7]:
window = 200
profile_B13 = mp.compute(A14B13['pressure'].values, windows=window)
profile_B17 = mp.compute(A14B17['pressure'].values, windows=window)
profile_B56 = mp.compute(A14B56['pressure'].values, windows=window)
profile_B78 = mp.compute(A14B78['pressure'].values, windows=window)
profile_B82 = mp.compute(A14B82['pressure'].values, windows=window)
profile_B85 = mp.compute(A14B85['pressure'].values, windows=window)


In [283]:
#mp.visualize(profile_B56)
#mp.visualize(profile_B17)
#plt.imshow(profile_B17["mp"])


def heatmap(x):
    #fig, (ax,ax2) = plt.subplots(nrows=2, sharex=True,figsize=(13, 6))
    fig, ax = plt.subplots(nrows=1, sharex=True,figsize=(14, 3))

    extent = [0, len(x),0,0.5]
    ax.imshow(x[np.newaxis,:], aspect="auto", extent=extent)
    ax.set_yticks([])
    ax.set_xlim(0, len(x))

    #ax2.plot(x)

    plt.tight_layout()
    plt.show()

def multi_heatmap(deployments):
    #plot multiple heatmaps in subplots
    #fig, (ax,ax2) = plt.subplots(nrows=2, sharex=True,figsize=(13, 6))
    fig, ax = plt.subplots(nrows=len(deployments), sharex=True,figsize=(14, 10))
    
    for i in range(len(deployments)):
        x = deployments[i]
        extent = [0, len(x),0,0.5]
        ax[i].imshow(x[np.newaxis,:], aspect="auto", extent=extent)
        ax[i].set_yticks([])
        ax[i].set_xlim(0, len(x))



    plt.tight_layout()
    plt.show()
    

#heatmap(profile_B17["mp"],A14B17["time s/100"])
#heatmap(profile_B56["mp"],A14B56["time s/100"])

deployments = [profile_B13["mp"],profile_B17["mp"],profile_B56["mp"],profile_B78["mp"],profile_B82["mp"],profile_B85["mp"]]

#multi_heatmap(deployments)

# Query search
Look for features by passing snippets of the data

In [9]:
def plot_query(x,y,q_start,q_stop,features = 0):
    #x: matrix profile
    #y: data
    #q_stsrt/stop query satart stop
    
    for i in range(len(x)):
        if x[i]>5:
            x[i] = 15
    
    
    y = np.array(y)
    y_q = y[q_start:q_stop]
    fig, (ax,ax1,ax2) = plt.subplots(nrows=3, sharex=True,figsize=(9, 6))
    
    ax.plot(y,c="b")
    ax.plot(np.linspace(q_start,q_stop,len(y_q)),y_q,c="r",label = "Query")
    ax.plot(features,y[features],marker = "*",c="y",ls = "None")
    ax.set_ylabel("Pressure normalized")
    
    extent = [0, len(x),0,0.5]
    ax1.imshow(x[np.newaxis,:], aspect="auto", extent=extent)
    ax1.set_yticks([])
    ax1.set_xlim(0, len(x))
    

    ax2.plot(x,c="g")
    ax2.set_ylabel("Matrix profile")

    plt.tight_layout()
    plt.legend()
    plt.show()
    
    
def plot_full_mp(data,mp,features = 0):
    #plot added mps after multiple queries
    y = np.array(data['pressure'])
    x = mp
    
    fig, (ax,ax1,ax2) = plt.subplots(nrows=3, sharex=True,figsize=(9, 6))
    ax.plot(y,c="b")
    ax.plot(features,y[features],marker = "*",c="y",ls = "None")
    ax.set_ylabel("Pressure normalized")
    extent = [0, len(x),0,0.5]
    ax1.imshow(x[np.newaxis,:], aspect="auto", extent=extent)
    ax1.set_yticks([])
    ax1.set_xlim(0, len(x))
    ax2.plot(x,c="g")
    ax2.set_ylabel("Matrix profile")
    plt.tight_layout()
    plt.legend()
    plt.show()
    
    

## Method:
Start by looking at one query, keeping a high threshold and trying to have as few false positives as possible. Keep adding new querys until it finds all steps. 

After spending a lot of time with the data, the "classic" step seems to exibit a rise in pressure as the drifter moves over the crest of the step, then a fall in pressure, followed by another rise. 

In [10]:
def mp_query(data,start,stop,query,threshold = None,features = np.array([0]),data_viz = "off",show_query = "off",var ="pressure"):
    #calculate mp with a given query
    #start stop: query start, stop
    #var: variable to look at
    #threshold: for visualization of the mp, cut out values higher than this
        
    query_profile = mp.compute(data[var].values, windows=(stop-start),query = query)
    y = np.array(data[var])
    
    #y_q = y[start:stop]
    x = query_profile["mp"]
        
    if threshold != None:
        x_i = x #make copy for loop
        for i in range(len(x)):
           #make values binary, either pass of not 
            if x_i[i]>threshold:
                x[i] = 100
            if x_i[i]<threshold:
                x[i] = 0
                
    if data_viz == "on":

        fig, (ax,ax1) = plt.subplots(nrows=2, sharex=True,figsize=(10, 6))

        extent = [0, len(x),0,0.5]
        ax.imshow(x[np.newaxis,:], aspect="auto", extent=extent)
        ax.set_yticks([])
        ax.set_xlim(0, len(x))
        ax.tick_params(bottom = False,labelbottom = False)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0,(box.x1-box.x0), box.y1-0.6])
        
        ax1.plot(y,c="b")
        if show_query == "on":
            ax1.plot(np.linspace(start,stop,len(query)),query,c="r",label = "Query")
        if features.any() != 0:
            ax1.plot(features,y[features],marker = "^",c="orange",markersize=8,ls = "None",label="Steps")
        ax1.set_ylabel("Pressure normalized")
        ax1.set_xlabel("Time [s $10^{-2}$]")
        
        box1 = ax1.get_position()
        ax1.set_position([box1.x0, box1.y0,(box1.x1-box1.x0), box1.y1+0.1])

        #ax2.plot(x,c="g")
        #ax2.set_ylabel("Matrix profile")

        #plt.tight_layout()
        plt.legend()
        #plt.savefig('/Users/georgecowie/Documents/Master/Masteroppgave/img_and_figures/figures_mp/A8_B80_query_mp.eps', format='eps')


        plt.show()
    return query_profile["mp"]







# Save selected query for thesis results
Compare reference measurments from 2020 2021

Plot hits by window size for one deployment

Try the englacial channel 19.082020

In thesis show different queries and resulting statistics. Why is the best query best?
Find the best query in video

Look at the video of andreas walking 


The drifters show a somewhat typical pattern in the pressure data when moving over a step. The patter is: A pressure increase as the drifter moves towards the crest of a step, a drop in pressure as it drops down the step and finally a pressure increase as it reaches the bottom of the step. However, these signals vary in length, magnitude and between trogh and peak values. Unfortunately other channel features may make a similar pattern, making it difficult do differentiate between steps and other features like meanders. 

Since not all steps have a distinct pattern i attempted to create a synthetic query based on multiple features to see if the mean signal would yeild better results than using a query of a single feature as a query. However, no synthetic signal gave a better result than using a single feature as a query. 
## A5-A1
4 steps

#B14 2
#B17 2
#B56 3
#B75 4
#B79 3
#B80 3
#B82 4
#B85 4
#B86 2

mean = (2+2+3+4+3+3+4+4+2)/9 = 3

## A6-A1
7 steps by Andreas

#b13 6
#B33 7
#B55 5
#B65 8
#B68 7
#B73 1
#B74 2
#B78 5

mean = (6+7+5+8+7+1+2+5)/8 = 5.125

## A7-A1
9 steps by Andreas

#B14 12
#B17 8
#B56 6
#B68 9
#B73 11
#B78 8
#B79 9
#B82 6
#B83 8

(12+8+6+9+11+8+9+6+8)/9 = 8.555555555555555

## A8-A1
11 by andreas
13 steps identified in video of B80. One of these steps is very small and may not yet be counted as a step
A8-A1 B80 18082020
This run yielded: 10 positive matches
                  2 false positives
### B10
Detected steps 10
### B13
Detected steps 9
### B33
Detected steps 8
### B55
Detected steps 11
### B65
Detected steps 12
### B74
Detected steps 8
### B75
Detected steps 12
### B84
Detected steps 16
### B85
Detected steps 14
### B86
Detected steps 6

10+9+8+11+12+8+12+16+14+6

## A9-A1
14 steps by andreas

## A10-A1
19 steps by andreas

## A11-A1
24 steps by andreas

## A12-A1
30 steps by andreas

## A13-A1
35 steps by andreas

### B55 
43
### B65 
25 Should be discounted though since the data is drifting
### B74 
33
### B75 
29
### B79 
36
### B80 
33
### B86 
40
mean = (43+33+29+36+33+40)/6 = 35.6

## A14-A1
In A re analysis of deployment A14-B56 16 more steps were identified.
40 steps identified in video
56 steps in reanalysed video
54 steps identified by andreas

query
Indices from B56
11861#query start index
11935#query stop index


### B56 A14-A1 18082020
This run yield: 26 positive matches
                16 false positives
                2 matches that are so close to other matches they contain the same feature
                45 total excluding trivial match with self
                

Re analysis:
            32 positive matches
            12 false positives
            

### B13
41 detected steps

#### B17
46 detected steps

### B78
43 detected steps

### B82
42 detected steps

### B84
43 detected steps

### B85
43 detected steps

## A15-A1
66 steps by andreas

#B33 51
#B55 52
#B68 50
#B74 47
#B84 55
#B85 58
mean = (51+52+50+47+55+58)/6 = 52.1666666667
    


# Query search A14

In [249]:
B56_start = 11861#query start
B56_stop = 11935#query_stop

#video features
query = A14B56['pressure'].values[B56_start:B56_stop]
mp_A14_B56_1 = mp_query(A14B56,B56_start,B56_stop,query,threshold = 5,features=steps_idx,data_viz = "off",show_query = "on")


#query from B56 dataset applied to subsequent sets from same channel section
query = A14B56['pressure'].values[B56_start:B56_stop]

mp_A14_B13 = mp_query(A14B13,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")
mp_A14_B17 = mp_query(A14B17,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")
mp_A14_B78 = mp_query(A14B78,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")
mp_A14_B82 = mp_query(A14B82,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")
mp_A14_B84 = mp_query(A14B84,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")
mp_A14_B85 = mp_query(A14B85,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")


# Query search A5

In [409]:
#mp_A5_B14 = mp_query(A5B14,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
#mp_A5_B17 = mp_query(A5B17,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
#mp_A5_B56 = mp_query(A5B56,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
#mp_A5_B75 = mp_query(A5B75,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
#mp_A5_B79 = mp_query(A5B79,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
#mp_A5_B80 = mp_query(A5B80,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
#mp_A5_B82 = mp_query(A5B82,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
#mp_A5_B85 = mp_query(A5B85,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")
mp_A5_B86 = mp_query(A5B86,B56_start,B56_stop,query=query,threshold = 5,data_viz = "on")

#B14 2
#B17 2
#B56 3
#B75 4
#B79 3
#B80 3
#B82 4
#B85 4
#B86 2

(2+2+3+4+3+3+4+4+2)/9

<IPython.core.display.Javascript object>

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


3.0

In [389]:
#mp_A6_B13 = mp_query(A6B13,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A6_B33 = mp_query(A6B33,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A6_B55 = mp_query(A6B55,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A6_B65 = mp_query(A6B65,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A6_B68 = mp_query(A6B68,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A6_B73 = mp_query(A6B73,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A6_B74 = mp_query(A6B74,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A6_B78 = mp_query(A6B78,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')

#b13 6
#B33 7
#B55 5
#B65 8
#B68 7
#B73 1
#B74 2
#B78 5

(6+7+5+8+7+1+2+5)/8

5.125

In [399]:
#mp_A7_B14 = mp_query(A7B14,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A7_B17 = mp_query(A7B17,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A7_B56 = mp_query(A7B56,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A7_B68 = mp_query(A7B68,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A7_B73 = mp_query(A7B73,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A7_B78 = mp_query(A7B78,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A7_B79 = mp_query(A7B79,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
#mp_A7_B82 = mp_query(A7B82,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')
mp_A7_B83 = mp_query(A7B83,B56_start,B56_stop,query=query,threshold=5,data_viz = 'on')

#B14 12
#B17 8
#B56 6
#B68 9
#B73 11
#B78 8
#B79 9
#B82 6
#B83 8

(12+8+6+9+11+8+9+6+8)/9

<IPython.core.display.Javascript object>

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


8.555555555555555

# Query search A8

In [15]:
#B80 with video features
mp_A8_B80 = mp_query(A8B80,B56_start,B56_stop,query,threshold = 5,features=A8_steps_idx,data_viz = "off")

mp_A8_B10 = mp_query(A8B10,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#10
mp_A8_B13 = mp_query(A8B13,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#9
mp_A8_B33 = mp_query(A8B33,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#8
mp_A8_B55 = mp_query(A8B55,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#11
mp_A8_B65 = mp_query(A8B65,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#12
mp_A8_B74 = mp_query(A8B74,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#8
mp_A8_B75 = mp_query(A8B75,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#12
mp_A8_B84 = mp_query(A8B84,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#16
mp_A8_B85 = mp_query(A8B85,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#14
mp_A8_B86 = mp_query(A8B86,B56_start,B56_stop,query = query,threshold = 5,data_viz = "off")#6


# Query search A13

In [235]:
#mp_A13_B55 = mp_query(A13B55,B56_start,B56_stop,query=query,threshold = 5, data_viz = 'on')
#mp_A13_B65 = mp_query(A13B65,B56_start,B56_stop,query=query,threshold = 5, data_viz = 'on')
#mp_A13_B74 = mp_query(A13B74,B56_start,B56_stop,query=query,threshold = 5, data_viz = 'on')
#mp_A13_B75 = mp_query(A13B75,B56_start,B56_stop,query=query,threshold = 5, data_viz = 'on')
#mp_A13_B79 = mp_query(A13B79,B56_start,B56_stop,query=query,threshold = 5, data_viz = 'on')
#mp_A13_B80 = mp_query(A13B80,B56_start,B56_stop,query=query,threshold = 5, data_viz = 'on')
mp_A13_B86 = mp_query(A13B86,B56_start,B56_stop,query=query,threshold = 5, data_viz = 'on')

#B55 43
#B65 25 Should be discounted though since the data is drifting
#B74 33
#B75 29
#B79 36
#B80 33
#B86 40

<IPython.core.display.Javascript object>

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


# Query search A15

In [242]:
#mp_A15_B33 = mp_query(A15B33,B56_start,B56_stop,query,threshold = 5, data_viz='on')
#mp_A15_B55 = mp_query(A15B55,B56_start,B56_stop,query,threshold = 5, data_viz='on')
#mp_A15_B68 = mp_query(A15B68,B56_start,B56_stop,query,threshold = 5, data_viz='on')
#mp_A15_B74 = mp_query(A15B74,B56_start,B56_stop,query,threshold = 5, data_viz='on')
#mp_A15_B84 = mp_query(A15B84,B56_start,B56_stop,query,threshold = 5, data_viz='on')
mp_A15_B85 = mp_query(A15B85,B56_start,B56_stop,query,threshold = 5, data_viz='on')

#B33 51
#B55 52
#B68 50
#B74 47
#B84 55
#B85 58


<IPython.core.display.Javascript object>

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


# 2. query 
Create synthetic signal of multiple steps

In [16]:

"""Create signals of length 90 with two tops"""
B56_start = 3300#query start
B56_stop = 3390#query_stop
query1 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 3420#query start
B56_stop = 3510#query_stop
query2 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 3630#query start
B56_stop = 3720#query_stop
query3 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 4630#query start
B56_stop = 4720#query_stop
query4 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 11855#query start
B56_stop = 11945#query_stop
query5 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 9840#query start
B56_stop = 9930#query_stop
query6 = A14B56['pressure'].values[B56_start:B56_stop]



query_synth = (query2+query3+query6)/3

query = A14B56['pressure'].values[B56_start:B56_stop]
mp_A14_B56_2 = mp_query(A14B56,B56_start,B56_stop,query_synth,threshold = 6,features=steps_idx,data_viz = "on",show_query = "off")
mp_A14_B56_2 = mp_query(A14B56,B56_start,B56_stop,query6,threshold = 5.,features=steps_idx,data_viz = "on",show_query = "on")

fig,ax = plt.subplots()
ax.plot(query_synth)
plt.show()


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [19]:
"""Create signal of length from trough-top-trough-top"""
B56_start = 530#query start
B56_stop = 670#query_stop
query1 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 905#query start
B56_stop = 1045#query_stop
query2 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 8080#query start
B56_stop = 8220#query_stop
query3 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 1900#query start
B56_stop = B56_start+140#query_stop
query4 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 3610#query start
B56_stop = B56_start+140#query_stop
query5 = A14B56['pressure'].values[B56_start:B56_stop]

B56_start = 8079#query start
B56_stop = B56_start+140#query_stop
query6 = A14B56['pressure'].values[B56_start:B56_stop]

query_synth = (query1+query2+query3+query4)/4

query = A14B56['pressure'].values[B56_start:B56_stop]
mp_A14_B56_2 = mp_query(A14B56,B56_start,B56_stop,query_synth,threshold = 8,features=steps_idx,data_viz = "on",show_query = "off")
mp_A14_B56_2 = mp_query(A14B56,B56_start,B56_stop,query6,threshold = 5.,features=steps_idx,data_viz = "on",show_query = "on")

fig,ax = plt.subplots()
ax.plot(query_synth)
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Derivative of pressure experiment
See if results are better looking at the derivative

In [20]:
import scipy.integrate as sint
from scipy import stats
def derivate_pressure(y,q_start,q_stop):
    #y: array
    #
    start = 0
    end = len(y)
    time = time_array(start,end)
    dx = 0.01
    #change time window? picewise
    #parametersise this based on mean velocity
    dy = np.diff(y)/dx
    dy_query = dy[q_start:q_stop]

    dy = pd.DataFrame(dy,columns=['pressure'])
    mp_query(dy,q_start,q_stop,dy_query,threshold = 4.,features=steps_idx,data_viz = "on",show_query = "on")



    return dy



#30/52-1 th =5
#q_start = 630
#q_stop = 719

#24/48-1
#q_start = 6260
#q_stop = 6363


#28/52-1 th = 3.3
q_start = 13131
q_stop = 13192

#26/51-1
q_start = 10238
q_stop = 10285
v = derivate_pressure(A14B56['pressure'].values,q_start,q_stop)

#fig, ax = plt.subplots(figsize = (8,8))
#ax.plot(v)
#ax.plot(steps_idx,v[steps_idx],marker = "^",c="r",ls = "None")

<IPython.core.display.Javascript object>

# Create box around steps iterate, create cumulative mp
results are potentially better than shown but i have not allowed double counting. two lines may contain the same feature.

When searching in smaller datasets, there are true steps to match with. This method may work well for long segments?

In [245]:
def mp_iterate_query(data,ref_data,window = 100,threshold = None,features = np.array([0])):
    #data: data to search for motifs
    #ref_data: data to draw queries from
    #calculate mp from multiple queries based on observations
    #n and a window size of m we extract n-m+1
    cum_query_profile = np.zeros(len(data)-window+1)

    k=0
    for i in range(1,len(features)):
        k+=1
        start = features[i] - int(window/2)
        stop = features[i] + int(window/2)
        
        query = ref_data[start:stop]
        cum_query_profile += mp.compute(data, windows=(stop-start),query = query)["mp"]
    
    cum_query_profile = (cum_query_profile/k)
    
    x = cum_query_profile
    fig,ax0 = plt.subplots(figsize=(10,8))
    ax0.plot(x)
    #ax0.plot(features,x[features],marker = "^",c="orange",markersize=8,ls = "None",label="Steps")
    plt.show()
    
    if threshold != None:
        x_i = x #make copy for loop
        for i in range(len(x)):
           #make values binary, either pass of not 
            if x_i[i]>threshold:
                x[i] = threshold
            if x_i[i]<threshold:
                x[i] = 1
                
        #for i in range(len(x)):
            #gather hits that are close together
        #    if x[i] == 1:
        #        x[i:i+window] = 0
        #        x[i] = threshold

    fig, (ax,ax1) = plt.subplots(nrows=2, sharex=True,figsize=(10, 6))
    extent = [0, len(x),0,0.5]
    ax.imshow(x[np.newaxis,:], aspect="auto", extent=extent)
    ax.set_yticks([])
    ax.set_xlim(0, len(x))
    ax.tick_params(bottom = False,labelbottom = False)
    box = ax.get_position()
    ax.set_position([box.x0, box.y0,(box.x1-box.x0), box.y1-0.6])
    ax1.plot(data,c="b")
    ax1.plot(features,data[features],marker = "^",c="orange",markersize=8,ls = "None",label="Steps")
    ax1.set_ylabel("Pressure normalized")
    ax1.set_xlabel("Time [s $10^{-2}$]")    
    box1 = ax1.get_position()
    ax1.set_position([box1.x0, box1.y0,(box1.x1-box1.x0), box1.y1+0.1])

    #plt.tight_layout()
    plt.legend()
    #plt.savefig('/Users/georgecowie/Documents/Master/Masteroppgave/img_and_figures/figures_mp/A8_iterate_B80_query_mp.eps', format='eps')
    plt.show()

#35/59=0.59    
#mp_iterate_query(A14B56['pressure'].values,window = 100,threshold = 13,features = steps_idx)

#42/63=0.66666 th 16
#with whole window plotted 34/49 = 0.69
#42/63 with two window lengths 80 and 150 th 14

mp_iterate_query(A14B56['pressure'].values,A14B56['pressure'].values,window = 150,threshold = 16,features = steps_idx)

#29/53=0.54 th 11.06
#with whole window plotted 36/59
#mp_iterate_query(A14B56['pressure'].values,A14B56['pressure'].values,window = 74,threshold = 11.06,features = steps_idx)

#37/60
#mp_iterate_query(A14B56['pressure'].values,A14B56['pressure'].values,window = 200,threshold = 18.6,features = steps_idx)

#42/64=0.65 th 22.7
#37/48= 0.77
#mp_iterate_query(A14B56['pressure'].values,A14B56['pressure'].values,window = 280,threshold = 22.55,features = steps_idx)

<IPython.core.display.Javascript object>

  fig, (ax,ax1) = plt.subplots(nrows=2, sharex=True,figsize=(10, 6))


<IPython.core.display.Javascript object>

In [61]:
#query boxes from B56 dataset applied to subsequent sets from same channel section

#45 steps
#mp_iterate_query_A14_B13 = mp_iterate_query(A14B13["pressure"].values,A14B56["pressure"].values,window = 150,threshold=16,features = steps_idx)

#49 steps
#mp_iterate_query_A14_B17 = mp_iterate_query(A14B17["pressure"].values,A14B56["pressure"].values,window = 150,threshold=16,features = steps_idx)

#50 steps
#mp_iterate_query_A14_B78 = mp_iterate_query(A14B78["pressure"].values,A14B56["pressure"].values,window = 150,threshold=16,features = steps_idx)

#55 steps
#mp_iterate_query_A14_B82 = mp_iterate_query(A14B82["pressure"].values,A14B56["pressure"].values,window = 150,threshold=16,features = steps_idx)

#38 steps
#mp_iterate_query_A14_B84 = mp_iterate_query(A14B84["pressure"].values,A14B56["pressure"].values,window = 150,threshold=16,features = steps_idx)

#47 steps
#mp_iterate_query_A14_B85 = mp_iterate_query(A14B85["pressure"].values,A14B56["pressure"].values,window = 150,threshold=16,features = steps_idx)

In [247]:
#with video features
mp_iterate_A8_B80 = mp_iterate_query(A8B80["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16.,features=A8_steps_idx)
mp_iterate_A8_B13 = mp_iterate_query(A8B13["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16.,features=B13_A8_steps_idx)


#Without video
#mp_iterate_A8_B10 = mp_iterate_query(A8B10["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B33 = mp_iterate_query(A8B33["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B55 = mp_iterate_query(A8B55["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B65 = mp_iterate_query(A8B65["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B74 = mp_iterate_query(A8B74["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B75 = mp_iterate_query(A8B75["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B84 = mp_iterate_query(A8B84["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B85 = mp_iterate_query(A8B85["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)
#mp_iterate_A8_B86 = mp_iterate_query(A8B86["pressure"].values, A14B56["pressure"].values,window=150,threshold = 16,features=A8_steps_idx)

<IPython.core.display.Javascript object>

  fig, (ax,ax1) = plt.subplots(nrows=2, sharex=True,figsize=(10, 6))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Random index test

In this test we will see if the query search method performs better than randomly generated indexes along the time series. The same number of "hits" as the best query search will be distributed aling the time series and compared to the actual steps with the same method as above

In [22]:
def random_test(data,hits_n,features = np.array([0]),var ="pressure",seed = 1):
    random.seed(seed)

    y = np.array(data[var])
    rand_mp = np.zeros(hits_n)
    for i in range(hits_n):
        rand_idx = random.randint(0, len(data))
        rand_mp[i] = (rand_idx)
   
    rand_mp = np.sort(rand_mp).astype(int)
    print(rand_mp)

    
    fig, ax = plt.subplots(nrows = 1, figsize=(12, 8))
    ax.plot(y,c="b")
    ax.set_ylabel("Pressure normalized")
    ax.set_xlabel("Time [s $10^{-2}$]")
    if features.any() != 0:
        ax.plot(rand_mp,y[rand_mp],marker = "s",c="r",ls = "None",label = "Random hits",markersize = '8')
        ax.plot(features,y[features],marker = "^",c="orange",ls = "None",label="Steps",markersize ='10')
        
    plt.legend()
    plt.savefig('/Users/georgecowie/Documents/Master/Masteroppgave/img_and_figures/figures_mp/A14_random_idex_test.eps', format='eps')
    plt.show()
    

#23 hits
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=10)

#21
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=35)

#23
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=63)

#19
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=1034)

#18
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=10084)

#17
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=20459)

#20
random_test(A14B56,45,features = steps_idx,var ="pressure",seed=55459)

#17
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=25759)

#23
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=207759)

#24
#random_test(A14B56,45,features = steps_idx,var ="pressure",seed=2090459)

mean_random_test = (23+21+23+19+18+17+20+17+23+24)/10

print("Mean of random tests: ",mean_random_test)


[  154   369   950  1613  1670  1920  1928  2112  2271  2550  2640  2862
  3016  3067  4144  4284  4338  4415  4520  5796  6612  7119  7147  7444
  7492  8138  8477  8615  9636  9756 10239 10251 10273 10468 10541 10928
 11829 12012 12486 12575 12778 12975 13035 13076 13283]


<IPython.core.display.Javascript object>

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


Mean of random tests:  20.5


# 3. query

In [23]:
B56_start = 5800#query start
B56_stop = 5896#query_stop
query = A14B56['pressure'].values[B56_start:B56_stop]
mp_A14_B56 = mp_query(A14B56,B56_start,B56_stop,query,threshold = 6,features=steps_idx,data_viz = "on",show_query = "on")



<IPython.core.display.Javascript object>

In [24]:
"""This one is worth saving"""

#query = A14B56['pressure'].values[q_start:q_stop]
#query_profile_B56 = mp.compute(A14B56['pressure'].values, windows=(q_stop-q_start),query = query)

B56_start = 9823#query start
B56_stop = 9955#query_stop
#mp1 = mp_query(A14B56,B56_start,B56_stop,threshold = 4,features=A14_HF_idx,data_viz = "off")
mp1 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")


B56_start = 3607#query start
B56_stop = 3772#query_stop
#mp2 = mp_query(A14B56,B56_start,B56_stop,threshold = 7,features=A14_HF_idx,data_viz = "off")
mp2 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")



B56_start = 6191#query start
B56_stop = 6425#query_stop
#mp3 = mp_query(A14B56,B56_start,B56_stop,threshold = 10,features=A14_HF_idx,data_viz = "off")
mp3 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")



B56_start = 9155#query start
B56_stop = 9300#query_stop
#mp4 = mp_query(A14B56,B56_start,B56_stop,threshold = 5,features=A14_HF_idx,data_viz = "off")
mp4 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")


B56_start = 4852#query start
B56_stop = 4972#query_stop
mp5 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")

B56_start = 2211#query start
B56_stop = 2400#query_stop
mp6 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")

B56_start = 534#query start
B56_stop = 697#query_stop
mp7 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")

B56_start = 13305#query start
B56_stop = 13490#query_stop
mp8 = mp_query(A14B56,B56_start,B56_stop,threshold = None,features=A14_HF_idx,data_viz = "off")
full_mp = (mp1[:13767]+mp2[:13767]+mp3[:13767]+mp4[:13767]+mp5[:13767]+mp6[:13767]+mp7[:13767]+mp7[:13767])/8

tl = 15.0

for i in range(len(full_mp)):
    if full_mp[i]>tl:
        full_mp[i] = 1000

plot_full_mp(A14B56,full_mp,features=A14_HF_idx)






TypeError: mp_query() missing 1 required positional argument: 'query'

# Query search acceleration data
gleason 2016

In this section i will show the results obtained from the vertical acceleration of the drifters. The accelerometers onboard the drifters are rotated to an NED reference frame using pre calculated euler angles provided by the sensor. The Sensor creates the euler angles by referencing the acceleration to magnetometer readings. By integrating the acceleration we get the drifter velocities. The drifters accelerate as they move over steps so this should be visible in the velocity. The drifters movement in steps is primarily in the vertical direction. Rapid changes in the vertical velocity are charecteristic of steps and there are not many other channel features that produce this behaviour. Therfore, this might create a charecteristic signal in the vertical velocity that can be detected using a query search.   

In [138]:
#load acceleration data
rotated_path = "/Users/georgecowie/Documents/Master/Masteroppgave/data/2020/processed_drifter_data/rotated_data/"
#acc_A14_B56 = pd.read_csv(rotated_path+"/A14_B56_rotated.csv",names = ["acc_z"],header = 0)

A14B13_acc_r = pd.read_csv(rotated_path + 'A14B13_acc_rotated.csv', names = ['acc_x','acc_y','acc_z']).iloc[6000:18050]
A14B17_acc_r = pd.read_csv(rotated_path + 'A14B17_acc_rotated.csv', names = ['acc_x','acc_y','acc_z']).iloc[10350:23800]
A14B56_acc_r = pd.read_csv(rotated_path + 'A14B56_acc_rotated.csv', names = ['acc_x','acc_y','acc_z']).iloc[12500:26500]
A14B78_acc_r = pd.read_csv(rotated_path + 'A14B78_acc_rotated.csv', names = ['acc_x','acc_y','acc_z']).iloc[20250:33500]
A14B82_acc_r = pd.read_csv(rotated_path + 'A14B82_acc_rotated.csv', names = ['acc_x','acc_y','acc_z']).iloc[23000:36500]
A14B84_acc_r = pd.read_csv(rotated_path + 'A14B84_acc_rotated.csv', names = ['acc_x','acc_y','acc_z']).iloc[26400:-1]
A14B85_acc_r = pd.read_csv(rotated_path + 'A14B85_acc_rotated.csv', names = ['acc_x','acc_y','acc_z']).iloc[26600:40200]

In [144]:

#28/48 steps
#B56_start = 2230#query start
#B56_stop = 2310#query_stop

#29/50
#B56_start = 10247#query start
#B56_stop = 10360#query_stop


#21/30 thershold 10
#B56_start = 10450#query start
#B56_stop = 10627#query_stop


#26/42 thershold 3.7
#B56_start = 13150#query start
#B56_stop = 13222#query_stop


#25/46 threshold 11
#B56_start = 618#query start
#B56_stop = 730#query_stop

#27/47 theshold 9
#B56_start = 277#query start
#B56_stop = 362#query_stop

#24/32 threshold 7
B56_start = 4848#query start
B56_stop = 4965#query_stop

query = A14B56_acc_r['acc_z'].values[B56_start:B56_stop]
mp_A14_B56_1 = mp_query(A14B56_acc_r,B56_start,B56_stop,query,threshold = 10,features=steps_idx,data_viz = "on",show_query = "on",var = "acc_z")
print(len(steps_idx))

<IPython.core.display.Javascript object>

56


# Iterate over multiple queries of acceleration and pressure

In [170]:
def mp_iterate_query_ac_p(data1,data2,ref_data1,ref_data2,window = 100,threshold = None,features = np.array([0])):
    #calculate mp from multiple queries based on observations. both acceleration and pressure
    #data1/2: data to search for motifs either pressure or acceleration
    #ref_data: data to draw queries from
    #n and a window size of m we extract n-m+1
    
    cum_query_profile = np.zeros(len(data1)-window+1)
    cum_query_profile1 = np.zeros(len(data1)-window+1)
    cum_query_profile2 = np.zeros(len(data2)-window+1)

    k=0
    for i in range(1,len(features)):
        k+=1
        start = features[i] - int(window/2)
        stop = features[i] + int(window/2)
        
        query1 = ref_data1[start:stop]
        query2 = ref_data2[start:stop]
        mp1 = mp.compute(data1, windows=window,query = query1)["mp"]
        mp2 = mp.compute(data2, windows=window,query = query2)["mp"]
        #mp1 = np.where(mp1 > np.mean(mp1), 100, mp1)
        #mp2 = np.where(mp2 > np.mean(mp2), 100, mp2)
        cum_query_profile1 += mp1
        cum_query_profile2 += mp2

    #max min normalize data so that the mps can be compared
    cum_query_profile1 = (cum_query_profile1 - cum_query_profile1.min()) / (cum_query_profile1.max() - cum_query_profile1.min())
    cum_query_profile2 = (cum_query_profile2 - cum_query_profile2.min()) / (cum_query_profile2.max() - cum_query_profile2.min())
 
    cum_query_profile = (cum_query_profile1+cum_query_profile2)/2
    
    x = cum_query_profile
    fig,ax0 = plt.subplots(figsize=(10,8))
    ax0.plot(cum_query_profile)
    plt.show()
    
    if threshold != None:
        x_i = x #make copy for loop
        for i in range(len(x)):
           #make values binary, either pass of not 
            if x_i[i]>threshold:
                x[i] = threshold
            if x_i[i]<threshold:
                x[i] = 0

    fig, (ax,ax1) = plt.subplots(nrows=2, sharex=True,figsize=(10, 6))
    extent = [0, len(x),0,0.5]
    ax.imshow(x[np.newaxis,:], aspect="auto", extent=extent)
    ax.set_yticks([])
    ax.set_xlim(0, len(x))
    ax.tick_params(bottom = False,labelbottom = False)
    box = ax.get_position()
    ax.set_position([box.x0, box.y0,(box.x1-box.x0), box.y1-0.6])
    ax1.plot(data1,c="b")
    #ax1.plot(data2,c='g')
    #ax1.plot(features,data1[features],marker = "^",c="orange",markersize=8,ls = "None",label="Steps")
    ax1.set_ylabel("Pressure normalized")
    ax1.set_xlabel("Time [s $10^{-2}$]")    
    box1 = ax1.get_position()
    ax1.set_position([box1.x0, box1.y0,(box1.x1-box1.x0), box1.y1+0.1])

    #plt.tight_layout()
    plt.legend()
    #plt.savefig('/Users/georgecowie/Documents/Master/Masteroppgave/img_and_figures/figures_mp/A8_iterate_B80_query_mp.eps', format='eps')
    plt.show()

#with video
#window size 150 not promising for other deployments
#39/59 = 0.66
mp_iterate_query_ac_p(A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,window = 150,threshold = 0.25,features = steps_idx)



#mp_iterate_query_ac_p(A14B13['pressure'].values,A14B13_acc_r['acc_z'].values,A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,window = 286,threshold = 0.27,features = steps_idx)
#mp_iterate_query_ac_p(A14B17['pressure'].values,A14B17_acc_r['acc_z'].values,A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,window = 286,threshold = 0.27,features = steps_idx)
#mp_iterate_query_ac_p(A14B78['pressure'].values,A14B78_acc_r['acc_z'].values,A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,window = 286,threshold = 0.27,features = steps_idx)
#mp_iterate_query_ac_p(A14B82['pressure'].values,A14B82_acc_r['acc_z'].values,A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,window = 286,threshold = 0.27,features = steps_idx)
#mp_iterate_query_ac_p(A14B84['pressure'].values,A14B84_acc_r['acc_z'].values[:-1],A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,window = 286,threshold = 0.27,features = steps_idx)
#mp_iterate_query_ac_p(A14B85['pressure'].values,A14B85_acc_r['acc_z'].values,A14B56['pressure'].values,A14B56_acc_r['acc_z'].values,window = 286,threshold = 0.27,features = steps_idx)




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
