In [58]:
import pandas as pd
import numpy as np
from datetime import datetime
# Geolocation service
import geopy as gp
from geopy.distance import vincenty

In [85]:
# Define some global constants
HOME_GPS = np.array([41.864080, -87.624100])
WORK_GPS = np.array([42.059570, -87.674778])
COMMUTE_MIN_TRIP_LEN = 1800 # Minimum trip length in seconds for daily commute trips
COMMUTE_MIN_DST_DIST = 2.0 # Minimum trip destination length in miles
COMMUTE_MAX_TRIP_DIST = 30.0 # Maximum trip distance in miles

In [86]:
df = pd.read_csv('raw_logs.csv',header=0).groupby('session')
for session, sdf in df:
    timeStr = datetime.fromtimestamp(session/1000.0).strftime('%Y-%m-%d %H:%M:%S')
    
    # Sort by timestamp get the final average MPG for every trip (kff1205)
    sdf = sdf.sort_values('time')
    
    # Remove invalid values
    sdf = sdf[sdf.k43 != 0]
    
    # GPS
    sts = sdf.time.min()
    ets = sdf.time.max()
    # Print trip total length
    diffTs = ets - sts
    tripLen = diffTs / 1000.0
    # GPS
    gpsLoc = np.array([sdf.kff1006.tolist(), sdf.kff1005.tolist()])
    distHome = np.array([vincenty(gpsLoc[:, 0], HOME_GPS).miles, vincenty(gpsLoc[:, -1], HOME_GPS).miles])
    distWork = np.array([vincenty(gpsLoc[:, 0], WORK_GPS).miles, vincenty(gpsLoc[:, -1], WORK_GPS).miles])
    # Trip distance
    tripDist = sdf.kff1204.max()
    # Determine if this is a daily commute trip
    commuteTrip = tripLen > COMMUTE_MIN_TRIP_LEN and tripDist < COMMUTE_MAX_TRIP_DIST and (distHome.min() + distWork.min()) < COMMUTE_MIN_DST_DIST
    if commuteTrip:
        print('session {0} ({1}), {2} records'.format(session, timeStr, len(sdf.index)))
        print("\t----- Trip type: Commute Trip -----")
                # Trip information
        #print("\t----- Trip Summary -----")
        # Print trip start and end timestamp
        stsStr = datetime.fromtimestamp(sts/1000.0).strftime('%Y-%m-%d %H:%M:%S')
        etsStr = datetime.fromtimestamp(ets/1000.0).strftime('%Y-%m-%d %H:%M:%S')
        print("\t|- Trip start {0}, end {1}".format(stsStr, etsStr))
        print("\t|- Trip length {0} seconds ({1:.2f} minutes)".format(diffTs/1000.0, diffTs/60000.0))
        print("\t|- Trip distance {0:.2f} miles".format(tripDist))
        # Ambient air temp
        airTempList = sdf.k46.tolist()
        if np.std(airTempList) > 0:
            meanAirTemp = np.mean(airTempList)
            print("\t|- Ambient air temp {0:.2f} degree".format(meanAirTemp))
        
        print("\n")
    else:
        #print("\t----- Trip type: Other Trips -----")   
        pass
    
    # Key values
    ### Engine
    ##### k11     =  Throttle Position(Manifold)
    ##### kc      =  Engine RPM
    ##### k46     =  Ambient air temp
    #
    ### Trip
    ##### kff1271 =  Fuel used (trip)
    ##### kff1204 =  Trip Distance
    ##### kff1205 =  Trip average MPG
    #
    ### GPS
    ##### kff1239 =  GPS Accuracy
    ##### kff1010 =  GPS Altitude
    ##### kff123b =  GPS Bearing
    ##### kff1006 =  GPS Latitude
    ##### kff1005 =  GPS Longitude

session 1518618621698 (2018-02-14 08:30:21), 3037 records
	----- Trip type: Commute Trip -----
	|- Trip start 2018-02-14 08:30:31, end 2018-02-14 09:21:02
	|- Trip length 3031.278 seconds (50.52 minutes)
	|- Trip distance 26.64 miles
	|- Ambient air temp 4.49 degree


session 1518649890183 (2018-02-14 17:11:30), 5348 records
	----- Trip type: Commute Trip -----
	|- Trip start 2018-02-14 17:11:36, end 2018-02-14 18:43:39
	|- Trip length 5522.998 seconds (92.05 minutes)
	|- Trip distance 25.19 miles
	|- Ambient air temp 13.73 degree


session 1518704971866 (2018-02-15 08:29:31), 3167 records
	----- Trip type: Commute Trip -----
	|- Trip start 2018-02-15 08:32:25, end 2018-02-15 09:25:09
	|- Trip length 3163.995 seconds (52.73 minutes)
	|- Trip distance 26.56 miles
	|- Ambient air temp 9.16 degree


session 1518730792039 (2018-02-15 15:39:52), 4468 records
	----- Trip type: Commute Trip -----
	|- Trip start 2018-02-15 15:40:06, end 2018-02-15 17:32:43
	|- Trip length 6756.998 seconds (112

In [15]:
sdf.sort_values('time').keys()

Index(['v', 'session', 'id', 'time', 'kff1005', 'kff1006', 'kff1001',
       'kff1007', 'k4', 'k2f', 'k11', 'k5', 'kc', 'kd', 'kf', 'kff1226',
       'kff1220', 'kff1221', 'k46', 'eml', 'k43', 'kff1271', 'k1f', 'kff1205',
       'kff1204', 'kff1266', 'kff1270'],
      dtype='object')