In [1]:
import os
import pandas as pd
import csv
import math
import matplotlib.pyplot as plt
import numpy as np

In [2]:
#the data is processed by folder: scooter/time/
#change data_folder
#change the result_df save path
data_folder = r'I:\Ailing\JC\Walkability Study\Scooter1\3_16_2020'
result_path = r'I:\Ailing\JC\Walkability Study\Scooter1\result\s1_result_formated_3.csv'

In [3]:
def combine_data(data_folder):
    folder_list = os.listdir(data_folder)
    for folder in folder_list:
        data_path = os.path.join(data_folder, folder)
        Ac_path = os.path.join(data_path, 'Accelerometer.csv')
        Lo_path = os.path.join(data_path, 'Location.csv')
        df_Ac = pd.read_csv(Ac_path)
        df_Lo = pd.read_csv(Lo_path)
        
def location(gps_path, data_path):
    df_acc = pd.read_csv(data_path)
    df_gps = pd.read_csv(gps_path)
    acc_t = list(df_acc['timeIntervalSince1970'])
    gps_t = list(df_gps['timeIntervalSince1970'])
    lat = []
    long = []
    for i in range(len(acc_t)):
        for j in range(len(gps_t)):
            if int(acc_t[i]) == int(gps_t[j]):
    #                 print(i, j, acc_t[i], gps_t[j])
                lat.append(df_gps.latitude[j])
                long.append(df_gps.longitude[j])
                break
            elif j == len(gps_t) - 1:
                df_acc = df_acc.drop(index = i)
    df_acc['lat'] = lat
    df_acc['long'] = long
    df_acc = df_acc[['timestamp','timeIntervalSince1970', 'x', 'y', 'z', 'lat', 'long']]
    df_acc.to_csv(data_path)
    
def reorientation(data_path):
    #reorient data, save to original file and then return reoriented data
    df = pd.read_csv(data_path)
    x = list(df['x'])
    y = list(df['y'])
    z = list(df['z'])
    oriented = []
    for i in range(len(x)):
        if x[i] == 0 or y[i] == 0 or z[i] == 0:
            oriented.append(0)
        else:
            a = math.pow(math.tan(y[i] / z[i]), -1)
            b = math.pow(math.tan(-x[i] / math.pow(math.pow(y[i], 2) + math.pow(z[i], 2), 0.5)), -1)
            oriented.append(-1 * math.sin(b) * x[i] + math.cos(b) * math.sin(a) * y[i] + math.cos(b) * math.cos(a) * z[i])
    df['reoriented'] = oriented
    df = df[['timestamp','timeIntervalSince1970', 'x', 'y', 'z', 'lat', 'long', 'reoriented']]
    df.to_csv(data_path)
    return oriented

def thresholding_algo(y, lag, threshold_std_low, influence, threshold_remove, threshold_std_up):
    # return signal with value of 1 and -1
    # y: original data, use the reoriented data get from reorientation function
    # lag: the lengh of data used to calculate std
    # influence: used to process original data. How important the data at i and i-1 is to contribute to data at i.
    # thershold_std: the times of std which used to select points big enough to be identified as anomaly
    # thershold_remove: the thershold used to remove values less than it.
    signals = np.zeros(len(y))
    #     signals = []
    filteredY = np.array(y)
    avgFilter = [0] * len(y)
    stdFilter = [0] * len(y)
    avgFilter[lag - 1] = np.mean(y[0:lag])
    stdFilter[lag - 1] = np.std(y[0:lag])
    for i in range(lag, len(y)):
        if abs(y[i] - avgFilter[i - 1]) > threshold_std_low * stdFilter[i - 1] and (
            abs(y[i] - avgFilter[i - 1]) < threshold_std_up * stdFilter[i - 1]) and (
                y[i] > threshold_remove or y[i] < -1 * threshold_remove):
            if y[i] > avgFilter[i - 1]:
                signals[i] = 1
            else:
                signals[i] = -1

            filteredY[i] = influence * y[i] + (1 - influence) * filteredY[i - 1]
            avgFilter[i] = np.mean(filteredY[(i - lag + 1):i + 1])
            stdFilter[i] = np.std(filteredY[(i - lag + 1):i + 1])
        else:
            signals[i] = 0
            filteredY[i] = y[i]
            avgFilter[i] = np.mean(filteredY[(i - lag + 1):i + 1])
            stdFilter[i] = np.std(filteredY[(i - lag + 1):i + 1])

    return list(signals)

def filter_detect(step, signals, rawdata, threshold_remain):
    newsignals = [0] * len(signals)
    flag = [0] * (len(signals) + step + 1)
    for i in range(step + 1):
        signals.append(0)
    for i in range(len(newsignals)):
        if flag[i] == 1:
            continue
        if signals[i] == 1:
            for j in range(i + 1, i + step + 1):
                if signals[j] == 1:
                    break
                elif signals[j] == -1:
                    newsignals[i] = 1
                    flag[i] = 1
                    flag[j] = 1
                    break
                elif rawdata[i] > threshold_remain:
                    newsignals[i] = 1
        elif signals[i] == -1:
            for j in range(i + 1, i + step + 1):
                if signals[j] == -1:
                    break
                elif signals[j] == 1:
                    newsignals[i] = 1
                    flag[i] = 1
                    flag[j] = 1
                    break
                elif rawdata[i] < -1 * threshold_remain:
                    newsignals[i] = 1
    return newsignals

def select_central(signal):
    # input signal. if there are continuous 1 or -1, choose the center of the continuous signals as 1 or -1
    # and others as 0
    flag = [0] * len(signal)
    single_signal = [0] * len(signal)
    for i in range(len(signal)):
        if flag[i] == 1:
            continue
        if signal[i] == 1:
            for j in range(i, len(signal)):
                if signal[j] != 1:
                    single_signal[int((i + j) / 2)] = 1
                    break
                flag[j] = 1
        if signal[i] == -1:
            for j in range(i, len(signal)):
                if signal[j] != -1:
                    single_signal[int((i + j) / 2)] = -1
                    break
                flag[j] = 1
    return single_signal

folder_list = os.listdir(data_folder)
result_list = []
for folder in folder_list:
    data_folder_path = os.path.join(data_folder, folder)
    print(data_folder_path)
    data_path = os.path.join(data_folder_path, 'Accelerometer.csv')
    gps_path = os.path.join(data_folder_path, 'Location.csv')


    # data_path = r'/media/Ailing/Elements/Projects/Binghamton/Walkability/newdata/iphone6plus/Accelerometer.csv'
    # gps_path = r'/media/Ailing/Elements/Projects/Binghamton/Walkability/newdata/iphone6plus/Location.csv'

    #set the parameters of function thresholding_algo
    lag = 100
    threshold_std_low = 2
    influence = 0
    threshold_remove = 2.5
    threshold_std_up = 4.5

    #set the parameters of function filter_detection
    step = 10
    threshold_remain = 5.5

    location(gps_path, data_path)

    o_z = reorientation(data_path)
    signals = thresholding_algo(o_z, lag, threshold_std_low, influence,threshold_remove, threshold_std_up)
    new_signals = filter_detect(step, signals, o_z, threshold_remain)
    result = select_central(new_signals)
    df = pd.read_csv(data_path)
    df['result'] = result
    df = df[['timestamp','timeIntervalSince1970', 'x', 'y', 'z', 'lat', 'long', 'reoriented', 'result']]
    result_list.append(df)
    df.to_csv(data_path)
result_df = pd.concat(result_list)
result_formated = result_df.groupby(['lat', 'long']).sum()
result_formated.loc[result_formated.result > 0, 'result'] = 1
result_formated.to_csv(result_path)

I:\Ailing\JC\Walkability Study\Scooter1\3_16_2020\1_Avenue_A_Victor
I:\Ailing\JC\Walkability Study\Scooter1\3_16_2020\1_Avenue_B_Andrea
I:\Ailing\JC\Walkability Study\Scooter1\3_16_2020\1_Avenue_C_Victor
I:\Ailing\JC\Walkability Study\Scooter1\3_16_2020\1_Bringham_Victor
I:\Ailing\JC\Walkability Study\Scooter1\3_16_2020\1_Laurel_Andrea
I:\Ailing\JC\Walkability Study\Scooter1\3_16_2020\1_Lester_Victor
