In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
import scipy.signal as ss
import plotly
from statistics import median, mean

from selfpkg import readDB, MinToMS
from dataFilter import butter_lowpass_filter, butter_highpass_filter

In [2]:
#function for translate volt to resistance from flapper signal
def transVolt(df):
    for i in range(8):
        df['flapper' + str(i)] = 1000 / df['flapper' + str(i)] - 10

timeConversionVal = 60000

In [3]:
def flapperChart(df, start=None, end=None):
    
    if start is None:
        start = df.iloc[0]['time'].item()
    else:
        start = MinToMS(start, df)
    if end is None:
        end = df.tail(1)['time'].item()
    else:
        end = MinToMS(end, df)
    
    startIndex = df.index[df['time'] == start].tolist()[0] #get the index of start & end
    endIndex = df.index[df['time'] == end].tolist()[0]
    
    df = df[startIndex: endIndex]
#     for i in range(8):
#         df['flapper' + str(i)] = butter_highpass_filter(butter_lowpass_filter(df['flapper' + str(i)], 5, 50, 5), 1, 50, 5)
#         df['flapper' + str(i)] = butter_highpass_filter(df['flapper' + str(i)], 2.1, 50, 5)
    
    trace0 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal, 
        y=df.flapper0[startIndex: endIndex] + 40,
        name = 'flapper0',
        line = dict(
            color = ('rgb(255, 0, 0)'),
            width = 1)
    )
    trace1 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=df.flapper1[startIndex: endIndex] + 30,
        name = 'flapper1',
        line = dict(
            color = ('rgb(0, 255, 0)'),
            width = 1)
    )
    trace2 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=df.flapper2[startIndex: endIndex] + 20,
        name = 'flapper2',
        line = dict(
            color = ('rgb(0, 0, 255)'),
            width = 1)
    )
    trace3 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=df.flapper3[startIndex: endIndex] + 10,
        name = 'flapper3',
        line = dict(
            color = ('rgb(0, 220, 255)'),
            width = 1)
    )
    trace4 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=df.flapper4[startIndex: endIndex] - 10,
        name = 'flapper4',
        line = dict(
            color = ('rgb(255, 0, 255)'),
            width = 1)
    )
    trace5 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=df.flapper5[startIndex: endIndex] - 20,
        name = 'flapper5',
        line = dict(
            color = ('rgb(255, 127, 0)'),
            width = 1)
    )
    trace6 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=df.flapper6[startIndex: endIndex] - 30,
        name = 'flapper6',
        line = dict(
            color = ('rgb(127, 127, 127)'),
            width = 1)
    )
    trace7 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=df.flapper7[startIndex: endIndex] - 40,
        name = 'flapper7',
        line = dict(
            color = ('rgb(0, 0, 0)'),
            width = 1)
    )
    
    return trace0, trace1, trace2, trace3, trace4, trace5, trace6, trace7

In [4]:
#detect joint by both flapper & acc signal
def jointDetection(df, start=None, end=None, filename='jointDetection'):

    if start is None:
        start = df.iloc[0]['time'].item()
    else:
        start = MinToMS(start, df)
    if end is None:
        end = df.tail(1)['time'].item()
    else:
        end = MinToMS(end, df)
    
    startIndex = df.index[df['time'] == start].tolist()[0]
    endIndex = df.index[df['time'] == end].tolist()[0]
    
    #original flapper signal
    #add them all (get rid of bad flapper)
    z = []
    for i in [0, 2, 4, 6]:
        if i == 0:
            z.append(df['flapper' + str(i)][startIndex: endIndex])
        else:
            z[0] += df['flapper' + str(i)][startIndex: endIndex]
    
    #lowpass & highpass for flapper signal
    y = []
    for i in [0, 2, 4, 6]:
        if i == 0:
            y.append(butter_highpass_filter(butter_lowpass_filter(df['flapper' + str(i)][startIndex: endIndex], 5, 50, 5), 1, 50, 5))
        else:
            y[0] += butter_highpass_filter(butter_lowpass_filter(df['flapper' + str(i)][startIndex: endIndex], 5, 50, 5), 1, 50, 5)
    
    trace5 = go.Scattergl(
        x=df.time[startIndex: endIndex]/60000, 
        y=z[0],
        name = 'flapper all in one',
        line = dict(
            color = ('rgb(0, 220, 0)'),
            width = 1)
    )
    
    #set threshold height for flapper signal
    thresHeight = 2
    for i in range(len(y[0])):
        if y[0][i] > -thresHeight and y[0][i] < thresHeight:
            y[0][i] = 0
    
    y[0] = abs(y[0])
    #generate peaks for signal and time    
    peaksIndex, _ = ss.find_peaks(y[0])
    peaksIndexPlus = [x + startIndex for x in peaksIndex]
    
    i = 0
    j = 1
    thresInterval = 50 #num of points of peaks distance
    res = []
    #for every potential joints, we may have several peaks for it. So we try to merge clost peaks, only remain one peak for one joint    
    while(j < len(peaksIndexPlus)):
        if  j == len(peaksIndexPlus) - 1:
            res.append(int((peaksIndexPlus[i] + peaksIndexPlus[j]) / 2))
            i = j
            j += 1       
        else:
            if peaksIndexPlus[j] - peaksIndexPlus[j-1] > thresInterval:
                j -= 1
                res.append(int((peaksIndexPlus[i] + peaksIndexPlus[j]) / 2))
                j += 1
                i = j
                j += 1       
            else:
                j += 1
    
    res = [x-13 for x in res] #fix shift from convolution
    timeConversionVal = 60000
    trace0 = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal, 
        y=y[0],
        name = 'flapper all in one',
        line = dict(
            color = ('rgb(127, 127, 127)'),
            width = 1)
    )
    
    trace4 = go.Scattergl(
        x = (df.time[res])/timeConversionVal,
        y = [0] * len(res),
        mode = 'markers',
        name = 'peaks',
        marker = dict(
            color = ('rgb(255, 0, 0)'))
    )
    ############################### Flapper above & Acc following ########################################
    #set threshold for three direction acc
    accX = []
    accY = []
    accZ = []
    acc = []
    thresAccHeight = 15
    for i in range(startIndex, endIndex+1):
        if df.accX[i] > -thresAccHeight and df.accX[i] < thresAccHeight:
            accX.append(0)
        else:
            accX.append(df.accX[i])
         
        if df.accY[i] > -thresAccHeight and df.accY[i] < thresAccHeight:
            accY.append(0)
        else:
            accY.append(df.accY[i])
        
        if df.accZ[i] > -thresAccHeight and df.accZ[i] < thresAccHeight:
            accZ.append(0)
        else:
            accZ.append(df.accZ[i])
    #add three direction acc to one singal direction, set threshold height again
    for i in range(len(accX)):
        cur = (accX[i] ** 2 + accY[i] ** 2 + accZ[i] ** 2) ** (1/2)
        if cur < 21:
            acc.append(0)
        else:
            acc.append(cur)
    #generate peaks both for acceleration & time
    peaksAccIndex, _ = ss.find_peaks(acc)
    peaksAccIndexPlus = [x + startIndex for x in peaksAccIndex]
    #same as flapper, we need to merge close peaks
    i = 0
    j = 1
    resAcc = []
    while(j < len(peaksAccIndexPlus)):
        if  j == len(peaksAccIndexPlus) - 1:
            resAcc.append(int((peaksAccIndexPlus[i] + peaksAccIndexPlus[j]) / 2))
            i = j
            j += 1       
        else:
            if peaksAccIndexPlus[j] - peaksAccIndexPlus[j-1] > thresInterval:
                j -= 1
                resAcc.append(int((peaksAccIndexPlus[i] + peaksAccIndexPlus[j]) / 2))
                j += 1
                i = j
                j += 1       
            else:
                j += 1
    
    traceAcc = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=[x-100 for x in acc],
        name = 'accX',
        line = dict(
            color = ('rgb(0, 0, 255)'),
            width = 1)
    )
    
    traceAccPoint = go.Scattergl(
        x=df.time[resAcc]/timeConversionVal,
        y=[-10] * len(resAcc),
        mode = 'markers',
        name = 'jointByAcc',
        marker = dict(
            color = ('rgb(127, 127, 127)'))
    )
    
    data = [traceAccPoint]
    
    layout = dict(title = filename,
                       xaxis = dict(title = 'Time(minutes)'),
                       yaxis = dict(title = 'Friction')
                      )

    fig = dict(data = data, layout = layout)
    plotly.offline.plot(fig, filename = filename + '.html')
    
    return res, resAcc #peaks index list

In [72]:
df = pd.read_csv('/Users/meng/Documents/dataPreprocessing/venv/source/sa_run01_trim(9.3min - 18.2min).csv')
transVolt(df)
jointFlapper, jointAcc= jointDetection(df, filename='jointDetection4run01')

[601, 762, 934, 1119, 1281, 1465, 1698, 1937, 2162, 2427, 2709, 3000, 3293, 3588, 3891, 4170, 4355, 4659, 4953, 5246, 5557, 5855, 6153, 6478, 6803, 7126, 7455, 7778, 8114, 8406, 9853, 10228, 10502, 10875, 11166, 11368, 11571, 12050, 12962, 13223, 14236, 14439, 14627, 14817, 15009, 15377, 15750, 15927, 16104, 16467, 17342]


In [5]:
#function for calculate the time interval between peaks
def velocityCal(df, peaksIndex):
    dfTime = df.iloc[peaksIndex] #get sub dataframe
    prev = dfTime[0: 1] #cache the first 
    for i, _ in dfTime.iteritems():
        cur = dfTime[i] #cache cur
        dfTime[i] = dfTime[i] - prev #get difference
        prev = cur #set prev from cur (cur val we've cached before)
    return dfTime

In [6]:
#detect joint by both flapper & acc signal
def jointDetectionCompass(df, start=None, end=None, filename='jointDetection'):

    if start is None:
        start = df.iloc[0]['time'].item()
    else:
        start = MinToMS(start, df)
    if end is None:
        end = df.tail(1)['time'].item()
    else:
        end = MinToMS(end, df)
    
    startIndex = df.index[df['time'] == start].tolist()[0]
    endIndex = df.index[df['time'] == end].tolist()[0]
    
    #original flapper signal
    #add them all (get rid of bad flapper)
    z = []
    for i in range(8):
        if i == 0:
            z.append(df['flapper' + str(i)][startIndex: endIndex])
        else:
            z[0] += df['flapper' + str(i)][startIndex: endIndex]
    
    #lowpass & highpass for flapper signal
    y = []
    for i in range(8):
        if i == 0:
            y.append(butter_highpass_filter(butter_lowpass_filter(df['flapper' + str(i)][startIndex: endIndex], 5, 50, 5), 1, 50, 5))
        else:
            y[0] += butter_highpass_filter(butter_lowpass_filter(df['flapper' + str(i)][startIndex: endIndex], 5, 50, 5), 1, 50, 5)
    
    trace5 = go.Scattergl(
        x=df.time[startIndex: endIndex]/60000, 
        y=z[0] - 150,#res,
        name = 'flapper all in one',
        line = dict(
            color = ('rgb(0, 220, 0)'),
            width = 1)
    )
    
    x = df.compassX
    y = df.compassY
    z = df.compassZ

    v = -((x ** 2 + y ** 2 + z ** 2) ** (1/2))
    v = butter_highpass_filter(v, 1, 50, 5)
    v = abs(v)
    for i in range(len(v)):
        if abs(v[i]) < 4:
            v[i] = 0

    #generate peaks both for acceleration & time
    peaksAccIndex, _ = ss.find_peaks(v)
    peaksAccIndexPlus = [x + startIndex for x in peaksAccIndex]
    #same as flapper, we need to merge close peaks
    i = 0
    j = 1
    thresInterval = 50
    resAcc = []
    while(j < len(peaksAccIndexPlus)):
        if  j == len(peaksAccIndexPlus) - 1:
            resAcc.append(int((peaksAccIndexPlus[i] + peaksAccIndexPlus[j]) / 2))
            i = j
            j += 1       
        else:
            if peaksAccIndexPlus[j] - peaksAccIndexPlus[j-1] > thresInterval:
                j -= 1
                resAcc.append(int((peaksAccIndexPlus[i] + peaksAccIndexPlus[j]) / 2))
                j += 1
                i = j
                j += 1       
            else:
                j += 1
    
    traceCompassPoint = go.Scattergl(
        x=df.time[resAcc]/timeConversionVal,
        y=[-10] * len(resAcc),
        mode = 'markers',
        name = 'jointonCompass',
        marker = dict(
            color = ('rgb(127, 127, 127)'))
    )
    
    traceCompass = go.Scattergl(
        x=df.time[startIndex: endIndex]/timeConversionVal,
        y=v[startIndex:endIndex],
        name = 'compassSum',
        line = dict(
            color = ('rgb(127, 127, 127)'),
            width = 1)
    )
    
    data = [traceCompass, traceCompassPoint]
    
    layout = dict(title = filename,
                       xaxis = dict(title = 'Time(minutes)'),
                       yaxis = dict(title = 'Friction')
                      )

    fig = dict(data = data, layout = layout)
    plotly.offline.plot(fig, filename = filename + '.html')
    
    return resAcc #peaks index list

In [7]:
#generate speed chart & add tow col (v, p) at the end of dataframe
def speedChart(df, resAcc, start=None, end=None, filename='speedChart'):
    
    if start is None:
        start = df.iloc[0]['time'].item()
    else:
        start = MinToMS(start, df)
    if end is None:
        end = df.tail(1)['time'].item()
    else:
        end = MinToMS(end, df)
    
    startIndex = df.index[df['time'] == start].tolist()[0]
    endIndex = df.index[df['time'] == end].tolist()[0]
    
    time2 = velocityCal(df.time, resAcc)
    
    #calculate mean of velocity
    errorPercent = 0.1
    mode = 6000

    avg2 = mean([x for x in time2 if x >= mode * (1 - errorPercent) and x <= mode * (1 + errorPercent)])
    
    con1 = []
    con2 = []
    avg = []
    #if one point lost, double time between two peaks; treble for two and so on...
    #So we shift the point with two/three/four times of mean
    
    time2 = time2.reset_index(drop=True)
    window = 10;
    
#     avg2 = 2000
#     for i in time2.index:
#         if time2[i] > 5000:
#             time2[i] = 3800
#         if time2[i]/avg2 >= 1.6 and time2[i]/avg2 <= 2.4:
#             time2[i] = time2[i] / 2
#         if time2[i]/avg2 >= 2.6 and time2[i]/avg2 <= 3.4:
#             time2[i] = time2[i] / 3
#         if time2[i]/avg2 >= 3.6 and time2[i]/avg2 <= 4.4:
#             time2[i] = time2[i] / 4
    

    for i in time2.index:
        l = int(window/2 - 1)
        r = int(window/2 + 1)
        if i - l >= 0 and i + r <= len(time2.index):
            list = []
            for e in time2[i-l: i+r]:
#                 if e < 10000:
                list.append(e)
            avg2 = mean(list)
        elif i-l < 0:
#             avg2 = mean(time2[i: i+window])
            list = []
            for e in time2[i: i+window]:
#                 if e < 10000:
                list.append(e)
            avg2 = mean(list)
            
        elif i+r > len(time2.index):
#             avg2 = mean(time2[i-(window-1): i])
            list = []
            for e in time2[i-(window-1): i]:
#                 if e < 10000:
                list.append(e)
            avg2 = mean(list)
        
#         if i > 30:
#             avg2 = 3433
        diff = abs(time2[i] - avg2)
        con2.append(1 - diff/avg2)
        avg.append(avg2)
    
    #fill the result to df
    df['Velocity'] = 0
    df['Probability'] = 0
    df['Avg'] = 0
    df['Velocity'][resAcc] = time2
    df['Probability'][resAcc] = con2
    df['Avg'][resAcc] = avg
    
    first = []
    shrinkVal = 0.1 #other points between two joint, lower the possibility
    thresPro = 0.8 #threshold for selecting a joint
    
    #fill out empty velocity & probability in df
    #connect tow close confident points(probability > 0.85), calculate points between them by linear function
    #first & last segment, only use one point
    for index, row in df[startIndex: endIndex].iterrows():
        if len(first) == 0:
            if row['Probability'] < thresPro:
                continue
            else:
                first = [row['Velocity'], row['Probability'], row['time'], index]
                for i in range(startIndex, index):
                    df['Velocity'][i] = first[0]
                    df['Probability'][i] = first[1] - shrinkVal
        else:
            if index != endIndex -1:
                if row['Probability'] < thresPro:
                    continue
                else:
                    k = (row['Velocity'] - first[0]) / (row['time'] - first[2])
                    b = row['Velocity'] - k * row['time']
                    for i in range(first[3]+1, index):
                        avg2 = (df['Avg'][index] + df['Avg'][first[3]])/2#(df['Velocity'][index] + first[0]) / 2
                        df['Velocity'][i] = k * df['time'][i] + b
                        df['Probability'][i] = (1 - abs(df['Velocity'][i]-avg2) / avg2) - shrinkVal
                    first = [row['Velocity'], row['Probability'], row['time'], index]
            else:
                for i in range(first[3], endIndex):
                    df['Velocity'][i] = first[0]
                    df['Probability'][i] = first[1] - shrinkVal
                    
    timeConversionVal = 60000
    
    traceAvg2 = go.Scattergl(
        x = df.time/timeConversionVal,
        y = [avg2] * len(df.index),
        mode = 'lines',
        name = 'accelerationAvg',
        line = dict(
            color = ('rgb(0, 0, 255)')
        )
    )
    
    traceAcceleration = go.Scattergl(
        x = df.time[resAcc]/timeConversionVal,
        y = time2,
        mode = 'markers',
        name = 'compass',
        marker = dict(
            color = ('rgb(0, 0, 255)')
        )
    )
    
    traceFit = go.Scattergl(
        x = df.time[startIndex: endIndex] / timeConversionVal,
        y = df.Velocity[startIndex: endIndex],
        mode = 'lines',
        name = 'velocity',
        line = dict(
            color = ('rgb(127, 127, 127)')
        )
    )
    
    data = [traceAcceleration, traceAvg2, traceFit]
    
    layout = dict(title = filename,
                   xaxis = dict(title = 'Time(minutes)'),
                   yaxis = dict(title = 'timeInterval')
                  )
    fig = dict(data = data, layout = layout)
    plotly.offline.plot(fig, filename = filename + '.html') 
    
    return con2

In [11]:
run = '11'
df1 = pd.read_csv('/Users/meng/Documents/dataPreprocessing/venv/source/sa_run' + str(run) + '_trim.csv')
transVolt(df1)
res = jointDetectionCompass(df1, filename='run' + str(run) + 'jointDetection')
con = speedChart(df1, res, filename='run' + str(run) + 'speedChart')
speedPlot(df1, filename='speed vs time run' + str(run))

In [31]:
df1.to_csv('run' + str(run) + ' with speed.csv')

In [149]:
from vector import dot

In [150]:
def velocityFromAcc(df):
    
    n = len(df.index)
    
    acc = []
    acc_x = df.accX/10;
    acc_y = df.accY/10;
    acc_z = df.accZ/10;
    for i in range(n):
        acc.append([acc_x[i], acc_y[i], acc_z[i]])
    
    unit_heading_vector = [] # unit vector @heading direction
    # for i in range(n):
    #     unit_heading_vector.append(getUnit(z_in_horizontal[i]))
    for i in range(n):
        unit_heading_vector.append([0, 0, 1])

    acc_project_horizontal = []
    for i in range(n):
        acc_project_horizontal.append(dot(acc[i], unit_heading_vector[i]))

    velocity_horizontal = [acc_project_horizontal[0] * 0.02 * 3.2808399]
    for i in range(n-1):
            velocity_horizontal.append(velocity_horizontal[i] + (acc_project_horizontal[i+1] * (df.time[i+1] - df.time[i])/1000 * 3.2808399))

    slope_velocity = [0]
    for i in range(n-1):
        slope_velocity.append((velocity_horizontal[i+1] - velocity_horizontal[i]) / ((df.time[i+1] - df.time[i])/1000) * 3.2808399)

    avg = movingaverage(slope_velocity, 500)
    
    return avg

In [142]:
def speedPlot(df, filename='speedPlot'):
    
    df['velocity'] = 20 / (df['velocity']/1000)
    df['avg'] = 20 / (df['avg']/1000)
    
    traceV1 = go.Scattergl(
        x = df.time / timeConversionVal,
        y = df.velocity,
        mode = 'lines',
        name = 'Velocity',
        line = dict(
            color = ('rgb(0, 0, 255)')
        )
    )
    
    traceP1 = go.Scattergl(
        x = df.time / timeConversionVal,
        y = df.probability,
        mode = 'lines',
        name = 'Probability',
        line = dict(
            color = ('rgb(255, 0, 0)')
        )
    )
    
    data = [traceV1, traceP1]
    
    layout = dict(title = filename,
                      xaxis = dict(title = 'Time(minutes)'),
                      yaxis = dict(title = 'speed(feet/sec)'))
    
    fig = dict(data = data, layout = layout)
    plotly.offline.plot(fig, filename = filename + '.html') 

In [143]:
def movingaverage(interval, window_size):
    window= np.ones(int(window_size))/float(window_size)
    return np.convolve(interval, window, 'same')

In [195]:
def jointDetection(df, filename='jointDetection'):
    x = df.compassX
    y = df.compassY
    z = df.compassZ

    v = ((x ** 2 + y ** 2 + z ** 2) ** (1/2))

    ma = movingaverage(v, 500)
    for i in range(len(v)):
        if v[i] > ma[i]:
            v[i] = ma[i]

    v_processed = butter_highpass_filter(butter_lowpass_filter(v, 2.5, 50, 5), 0.5, 50, 5)

    for i in range(len(v_processed)):
        if v_processed[i] < 1:
            v_processed[i] = 0

    i = 0
    j = 0
    width = 25
    while(j < len(v_processed)):
        if v_processed[i] == 0 and v_processed[j] == 0:
            i = i + 1
            j = j + 1
        elif v_processed[i] != 0 and v_processed[j] != 0:
            j = j + 1
        elif v_processed[i] != 0 and v_processed[j] == 0:
            if j - i + 1 > width:
                for k in range(i, j):
                    v_processed[k] = 0
            i = j;

    #generate peaks index
    peaksIndex, _ = ss.find_peaks(v_processed)
    #merge close peaks
    i = 0
    j = 1
    thresInterval = 70
    res = []
    while(j < len(peaksIndex)):
        if  j == len(peaksIndex) - 1:
            if v_processed[peaksIndex[i]] >= v_processed[peaksIndex[j]]:
                res.append(peaksIndex[i])
            else:
                res.append(peaksIndex[j])
    #         res.append(int((peaksIndex[i] + peaksIndex[j]) / 2))
            i = j
            j += 1       
        else:
            if peaksIndex[j] - peaksIndex[j-1] > thresInterval:
                j -= 1
                if v_processed[peaksIndex[i]] >= v_processed[peaksIndex[j]]:
                    res.append(peaksIndex[i])
                else:
                    res.append(peaksIndex[j])
    #             res.append(int((peaksIndex[i] + peaksIndex[j]) / 2))
                j += 1
                i = j
                j += 1       
            else:
                j += 1

    res = res[1:]

    trace0 = go.Scattergl(
        x=df.time/timeConversionVal,
        y=v,
        name = 'compass',
        line = dict(
            color = ('rgb(25, 25, 25)'),
            width = 1)
    )

    trace1 = go.Scattergl(
        x=df.time/timeConversionVal,
        y=v_processed,
        name = 'processed compass',
        line = dict(
            color = ('rgb(127, 127, 127)'),
            width = 1)
    )

    trace2 = go.Scattergl(
        x = df.time[res] / timeConversionVal,
        y = v_processed[res],
        mode = 'markers',
        marker = dict(
            color = ('rgb(225, 0, 0)'),
            size = 5
        )
    )

    trace3 = go.Scattergl(
        x = df.time[peaksIndex] / timeConversionVal,
        y = v_processed[peaksIndex],
        mode = 'markers',
        marker = dict(
            color = ('rgb(0, 0, 225)'),
            size = 5
        )
    )

    data = [trace0, trace1, trace3, trace2]

    layout = dict(title = filename,
                       xaxis = dict(title = 'Time(minutes)'),
                       yaxis = dict(title = 'Megnetic')
                      )

    fig = dict(data = data, layout = layout)
    plotly.offline.plot(fig, filename = filename + '.html')
    
    return res

In [230]:
def speedCalculation(df, res, filename='speedCalculation'):
    time = velocityCal(df.time, res)

    #calculate mean of velocity
    errorPercent = 0.2
    mode = 6000

    #avg = mean([x for x in time2 if x >= mode * (1 - errorPercent) and x <= mode * (1 + errorPercent)])
    con = []
    avg = []
    #if one point lost, double time between two peaks; treble for two and so on...
    #So we shift the point with two/three/four times of mean

    time = time.reset_index(drop=True)
    window = 5;

    avg = movingaverage(time, 10)

    for i in time.index:    
        try:
            if i > 0 and i < len(time.index) and \
                time[i] < (2 + errorPercent) * time[i-1] and time[i] > (2 - errorPercent) * time[i-1] and\
                time[i] < (2 + errorPercent) * time[i+1] and time[i] > (2 - errorPercent) * time[i+1]:
                time[i] = time[i-1]
        except KeyError:
            time[i] = time[i]
        finally:   
            diff = abs(time[i] - avg[i])
            con.append(1 - diff/avg[i])

    #fill the result to df
    df['velocity'] = 0
    df['probability'] = 0
    df['avg'] = 0
    df['velocity'][res] = time
    df['probability'][res] = con
    df['avg'][res] = avg

    shrinkVal = 0.1 #other points between two joint, lower the possibility
    thresPro = 0.1 #threshold for selecting a joint

    first = []
    #fill out empty velocity & probability in df
    #connect tow close confident points(probability > thresPro), calculate points between them by linear function
    #first & last segment, only use one point
    for index, row in df.iterrows():
        if len(first) == 0:
            if row['probability'] < thresPro:
#             if row['velocity'] > 1.5 * row['avg']:
                continue
            else:
                first = [row['velocity'], row['probability'], row['time'], index]
                for i in range(0, index):
                    df['velocity'][i] = first[0]
                    df['probability'][i] = first[1] - shrinkVal
        else:
            if index != len(df.index) -1:
                if row['probability'] < thresPro:
#                 if row['velocity'] > 1.5 * row['avg']:
                    continue
                else:
                    k = (row['velocity'] - first[0]) / (row['time'] - first[2])
                    b = row['velocity'] - k * row['time']
                    for i in range(first[3]+1, index):
                        avg2 = (df['avg'][index] + df['avg'][first[3]])/2
                        df['velocity'][i] = k * df['time'][i] + b
                        df['probability'][i] = (1 - abs(df['velocity'][i]-avg2) / avg2) - shrinkVal
                    first = [row['velocity'], row['probability'], row['time'], index]
            else:
                for i in range(first[3], len(df.index)):
                    df['velocity'][i] = first[0]
                    df['probability'][i] = first[1] - shrinkVal
    
#     ratio = 0.05
#     mvAvg = velocityFromAcc(df)
#     meanFront = abs(mean(mvAvg[0: int(ratio * len(df.index))]))
#     meanBack = abs(mean(mvAvg[int((1-ratio) * len(df.index)): len(df.index)]))
#     for index, row in df.iterrows():
#         if row['time'] < df.time.tail(1).tolist()[0] * ratio:
#             if abs(mvAvg[index]) < 15 and meanFront < 15:
#                 df['velocity'][index] = df['velocity'][int(ratio * len(df.index))]
#         if row['time'] > df.time.tail(1).tolist()[0] * (1-ratio):
#             if abs(mvAvg[index]) < 15 and meanBack < 15:
#                 df['velocity'][index] = df['velocity'][int((1-ratio) * len(df.index))]
            
    timeConversionVal = 60000

    traceAcceleration = go.Scattergl(
        x = df.time[res]/timeConversionVal,
        y = time,
        mode = 'markers',
        name = 'compass',
        marker = dict(
            color = ('rgb(0, 0, 255)')
        )
    )

    traceFit = go.Scattergl(
        x = df.time / timeConversionVal,
        y = df.velocity,
        mode = 'lines',
        name = 'velocity',
        line = dict(
            color = ('rgb(127, 127, 127)')
        )
    )


    data = [traceAcceleration, traceFit]

    layout = dict(title = filename,
                   xaxis = dict(title = 'Time(minutes)'),
                   yaxis = dict(title = 'timeInterval')
                  )
    fig = dict(data = data, layout = layout)
    plotly.offline.plot(fig, filename = filename + '.html') 

In [1]:
run = '08'
df = pd.read_csv('/Users/meng/Documents/dataPreprocessing/venv/source/sa_run' + str(run) + '_trim.csv')
# df = df.reset_index(drop=True)
# first = df['time'][0]
# for i in range(len(df.index)):
#     df['time'][i] = df['time'][i] - first
res = jointDetection(df, 'jointDetection' + str(run))
speedCalculation(df, res, 'speedCalculation' + str(run))
speedPlot(df, 'speedPlot' + str(run))

NameError: name 'pd' is not defined

In [229]:
df.to_csv('run' + str(run) + ' with speed (04\04).csv')