In [1]:
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join

def expand_interpolate(_filename):
    
    data_cols = ['Accuracy','Bearing',
                 'acceleration_x','acceleration_y','acceleration_z',
                 'gyro_x','gyro_y','gyro_z',
                 'Speed','second']
    
    print("Processing %s" %_filename)
    df = pd.read_csv('./data/0-raw_data/safety/features/'+_filename)
    df = df.sort_values(by=['bookingID','second']).reset_index(drop=True)
    df['t0'] = df['second']
    df['t-1'] = df.groupby(['bookingID'])['second'].shift(+1)
    df['diff'] = df['t0']-df['t-1']
    #if diff > 5 minutes we consider that the trip has to be split into 2 sub parts
    # we create a second bookingID because it doesn't make sense to interpolate for such a long duration
    df['bookingID2'] = 0
    df.loc[df['diff']>300,'bookingID2'] = 1
    df['bookingID2'] = df.groupby(['bookingID'])['bookingID2'].transform('cumsum')
    
    df['time'] = pd.to_datetime(df["second"], unit='s')
    df = df.drop(['t0','t-1','diff'], axis=1)
    
    df = df.set_index(['time'])
    
    #expand and interpolate trip by booking ID and bookingID2
    print("Expanding %s" %_filename)
    df = df.groupby(['bookingID','bookingID2'])[data_cols].resample('1S').asfreq().interpolate(method='linear')
    df = df.reset_index()
    df = df.drop(['time'], axis=1)
    print("Saving... %s" %_filename)
    df.to_csv('./data/1-preprocessed/'+_filename, index=False)
    print("Finished with %s " %_filename)
    

In [2]:
mypath = './data/0-raw_data/safety/features/'
onlyfiles = [f for f in listdir(mypath) if f.startswith('part-')]

for filename in onlyfiles:
    expand_interpolate(filename)
print("Finished to expand all files.")

Processing part-00001-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Expanding part-00001-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Saving... part-00001-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Finished with part-00001-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv 
Finished to expand all files.
Processing part-00003-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Expanding part-00003-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Saving... part-00003-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Finished with part-00003-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv 
Finished to expand all files.
Processing part-00004-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Expanding part-00004-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Saving... part-00004-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Finished with part-00004-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv 
Finished to expand all files.
Processing part-00000-e6120af0-10c2-4248-97c4-81baf4304e5c-c000.csv
Expanding part-00000