In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
# Geolocation lib
import geopy as gp
from geopy.distance import vincenty

In [105]:
# Define some global constants
SHOW_INFO = False
HOME_GPS = np.array([41.864080, -87.624100])
WORK_GPS = np.array([42.059570, -87.674778])
COMMUTE_MIN_TRIP_LEN = 1800 # Minimum trip length in seconds for daily commute trips
COMMUTE_MIN_DST_DIST = 2.0 # Minimum trip destination length in miles
COMMUTE_MAX_TRIP_DIST = 19.0 # Maximum trip distance in miles
CAR_FUEL_TANK_SIZE = 15.9 # Gallons

In [106]:
df = pd.read_csv('raw_logs.csv',header=0).groupby('session')
tripData = pd.DataFrame()
for session, sdf in df:
    dtObj = datetime.fromtimestamp(session/1000.0)
    timeStr = dtObj.strftime('%Y-%m-%d %H:%M:%S')
    
    # Sort by timestamp get the final average MPG for every trip (kff1205)
    sdf = sdf.sort_values('time')
    # Remove invalid values
    sdf = sdf[sdf.k43 != 0]
    
    # Trip duration
    sts = sdf.time.min()
    ets = sdf.time.max()
    diffTs = ets - sts
    tripLen = diffTs / 1000.0
    # GPS
    gpsLoc = np.array([sdf.kff1006.tolist(), sdf.kff1005.tolist()])
    distHome = np.array([vincenty(gpsLoc[:, 0], HOME_GPS).miles, vincenty(gpsLoc[:, -1], HOME_GPS).miles])
    distWork = np.array([vincenty(gpsLoc[:, 0], WORK_GPS).miles, vincenty(gpsLoc[:, -1], WORK_GPS).miles])
    # Trip distance
    tripDist = sdf.kff1204.max() * 0.62137119223733  # Km to Miles
    # Determine if this is a daily commute trip
    commuteTrip = tripLen > COMMUTE_MIN_TRIP_LEN and tripDist < COMMUTE_MAX_TRIP_DIST and (distHome.min() + distWork.min()) < COMMUTE_MIN_DST_DIST
    # Trip type
    tripType = "commute-" if commuteTrip else "other"
    if commuteTrip:
        tripType += "work2home" if commuteTrip and distHome[0] > distWork[0] else "home2work" 
    # Fuel Usage
    fuelUsed = sdf.kff1271.max() * 0.26417205124156 # L to Gal
    # MPG
    averageMPG = tripDist / fuelUsed
    # Ambient air temp
    airTempList = sdf.k46.tolist()
    airTempValid = np.std(airTempList) > 0
    if airTempValid:
        meanAirTemp = np.mean(airTempList)
    
    # Dumps information
    if SHOW_INFO:
        print('session {0} ({1}), {2} records'.format(session, timeStr, len(sdf.index)))
        print("---------- Trip type: {0} ----------".format(tripType))
        # Print trip start and end timestamp
        stsStr = datetime.fromtimestamp(sts/1000.0).strftime('%Y-%m-%d %H:%M:%S')
        etsStr = datetime.fromtimestamp(ets/1000.0).strftime('%Y-%m-%d %H:%M:%S')
        print("\t|- Start {0}, end {1}".format(stsStr, etsStr))
        print("\t|- Duration {0} seconds ({1:.2f} minutes)".format(diffTs/1000.0, diffTs/60000.0))
        print("\t|- Distance {0:.2f} miles".format(tripDist))
        print("\t|- Fuel used {0:.3f} gal".format(fuelUsed))
        print("\t|- Average MPG {0:.3f}".format(averageMPG))
        if airTempValid:
            print("\t|- Ambient air temp {0:.2f} degree".format(meanAirTemp))

    # Save session data
    sessionData = pd.DataFrame({
        'session': session,
        'sTS': sts,
        'eTS': ets,
        'weekday': dtObj.strftime('%a'),
        'airTemp': meanAirTemp,
        'tripType': tripType,
        'duration': tripLen,
        'distance': tripDist,
        'fuelUsed': fuelUsed,
        'mpg': averageMPG,
    }, index=[0])
    sessionData.set_index('session', inplace=True)
    tripData = tripData.append(sessionData)
    
    # Key values
    ### Engine
    ##### k11     =  Throttle Position(Manifold)
    ##### kc      =  Engine RPM
    ##### k46     =  Ambient air temp
    ##### kff1271 =  Fuel used (trip)
    #
    ### Trip
    ##### kff1271 =  Fuel used (trip)
    ##### kff1204 =  Trip Distance
    ##### kff1205 =  Trip average MPG
    #
    ### GPS
    ##### kff1239 =  GPS Accuracy
    ##### kff1010 =  GPS Altitude
    ##### kff123b =  GPS Bearing
    ##### kff1006 =  GPS Latitude
    ##### kff1005 =  GPS Longitude

In [9]:
sdf.sort_values('time').keys()

Index(['v', 'session', 'id', 'time', 'kff1005', 'kff1006', 'kff1001',
       'kff1007', 'k4', 'k2f', 'k11', 'k5', 'kc', 'kd', 'kf', 'kff1226',
       'kff1220', 'kff1221', 'k46', 'eml', 'k43', 'kff1271', 'k1f', 'kff1205',
       'kff1204', 'kff1266', 'kff1270'],
      dtype='object')

In [107]:
tripData

Unnamed: 0_level_0,airTemp,distance,duration,eTS,fuelUsed,mpg,sTS,tripType,weekday
session,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1518563360039,8.392826,26.325792,2314.003,1518569076312,0.982397,26.797516,1518566762309,other,Tue
1518618621698,4.492262,16.556014,3031.278,1518621662496,0.682205,24.26839,1518618631218,commute-home2work,Wed
1518642276496,8.682008,1.28669,237.975,1518642671496,0.07536,17.074013,1518642433521,other,Wed
1518649890183,13.727936,15.650596,5522.998,1518655419497,0.878662,17.811854,1518649896499,commute-work2home,Wed
1518704971866,9.157878,16.505419,3163.995,1518708309497,0.646242,25.540615,1518705145502,commute-home2work,Thu
1518730792039,11.859893,18.333136,6756.998,1518737563499,0.781541,23.45767,1518730806501,commute-work2home,Thu
1518791576931,3.908981,3.400137,822.981,1518793019651,0.213045,15.959714,1518792196670,other,Fri
1518799386457,3.908981,1.272408,10.992,1518799830652,0.071795,17.722846,1518799819660,other,Fri
1518811212541,8.189024,1.865104,818.999,1518812068590,0.128013,14.56966,1518811249591,other,Fri
1518815119549,11.539216,0.354725,101.0,1518815341592,0.025193,14.080142,1518815240592,other,Fri
