# Truck 1 Analysis

#### Imports and global variables are defined here.

In [1]:
# Imports required for this notebook.
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt

#Get rid of Pandas scientific notation string formatting
#pd.set_option('display.float_format', lambda x: '%.12f' % x)

# Local path to the CSV file containing the data for truck two (2).
truck1Path = "../data/trucks/truck1.csv"

# Local path to the column dictionary for truck two (2).
truck1DictPath = "../data/dictionary/truck1dict.csv"

#### Functions are defined here.
*NOTE: We need to discuss reusable code and how we're going to handle commonly-used functions.*

In [2]:
"""Uses Pandas's read_csv method to read a CSV file and returns a DataFrame of it to the notebook.
This function reads in all rows and defines the header row at index 0 by default."""
def readCsv(truck, records = None, headerIdx = 0):
    return pd.read_csv(truck, nrows = records, header = [headerIdx])

"""Creates a Python dictionary from a pre-defined CSV dictionary.
This will only look at the first two columns of the given CSV file."""
def createPythonDictionary(dictionaryCSV):
    # Initialize the resulting dictionary.
    dictResult = {}
    
    # Opens the passed in CSV which defines the rename dictionary and iterates through it to store each value into a Python dictionary.
    with open(dictionaryCSV, 'r') as f:
        for row in csv.reader(f):
            dictResult.update({row[0]:row[1]})
    return dictResult

"""Returns a DataFrame without the UTC columns. Specific to the given Volvo truck data."""
def cullUtcCols(truckData):
    noUTC = truckData.drop(["UTC hour", "UTC minute", "UTC second", "UTC month", "UTC day", "UTC year"], axis = 1)
    return noUTC

"""Takes a DataFrame and a dictionary as parameters; uses the dictionary to rename all matching columns then
returns the changed DataFrame."""
def renameColumns(dataFrame, dictionary):
    return dataFrame.rename(columns = dictionary)

#### Notebook driver script from here on.

Reading in the truck data; one can change how many records are read by using "records = <#>" in the readCsv function or remove it entirely to read all records.

In [3]:
truck1Df = readCsv(truck1Path)
truck1Df.head()

Unnamed: 0,Time,1730_CH9_ AutomaticStartStop,1730_CH10_Truck_Batteries,4649_Ch1_Alternator_250A,4649_Ch2_BattOut_100A,4649_Ch3_Trailer_50A,4649_Ch4_Inverter_100A,4649_Ch5_Fridge_50A,4649_Ch6_200A_APU_BatteryBank,4649_Ch7_200A_BatterySeparator,...,WheelBasedVehicleSpeed,CPU load,UTC hour,UTC minute,UTC second,UTC month,UTC day,UTC year,GPS altitude,GPS speed
0,08/07/2019 07:54:02.959,,,,,,,,,,...,7.96875,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
1,08/07/2019 07:54:03.059,,,,,,,,,,...,8.042969,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
2,08/07/2019 07:54:03.159,,,,,,,,,,...,8.042969,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
3,08/07/2019 07:54:03.259,,,,,,,,,,...,8.105469,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
4,08/07/2019 07:54:03.359,,,,,,,,,,...,8.105469,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,


Dropping UTC columns.

In [4]:
#truck1Df = cullUtcCols(truck1Df)
#truckTwoDf.head()

Renaming the columns for better readability.

In [5]:
# Create a Python dictionary out of the CSV dictionary.
truck1Dict = createPythonDictionary(truck1DictPath)

truck1Df = renameColumns(truck1Df, truck1Dict)
truck1Df.head()

Unnamed: 0,Time (DateTime),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,Vehicle Speed (Wheel-Based; km/hr),CPU Load (%),UTC Hour,UTC Minute,UTC Second,UTC Month,UTC Day,UTC Year,Altitude(m),Speed (km/hr)
0,08/07/2019 07:54:02.959,,,,,,,,,,...,7.96875,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
1,08/07/2019 07:54:03.059,,,,,,,,,,...,8.042969,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
2,08/07/2019 07:54:03.159,,,,,,,,,,...,8.042969,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
3,08/07/2019 07:54:03.259,,,,,,,,,,...,8.105469,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,
4,08/07/2019 07:54:03.359,,,,,,,,,,...,8.105469,40.0,11.0,54.0,3.0,8.0,7.0,19.0,0.0,


In [6]:
# percentage of nulls
truck1Df.isnull().sum()/len(truck1Df)

Time (DateTime)                            0.000036
1730 Automatic Start & Stop (V)            0.266421
1730 Batteries (V)                         0.000404
4649 Alternator (A)                        0.000404
4649 Battery Out (A)                       0.000404
4649 Trailer (A)                           0.000404
4649 Inverter (A)                          0.000404
4649 Fridge (A)                            0.000404
4649 Battery Bank (A)                      0.000404
4649 Battery Separator (A)                 0.000404
4649 Battery Voltage (V)                   0.000404
Engine Output Torque (%)                   0.000036
HiRes Engine Output Torque (%)             0.000036
Outside Air Temperature (C)                0.000043
Urea Tank Temperature (C)                  0.001297
Outside Air Pressure (kPa)                 0.000041
Boost Pressure (kPa)                       0.000036
Cruise Control Speed (km/hr)               0.328143
Driver Requested Torque (%)                0.000036
Engine Reque

Remove rows without time data and remove Columns with all NaN values

In [7]:
truck1Df.dropna(subset=['Time (DateTime)', 'UTC Hour'], inplace=True)
truck1Df.dropna(axis=1, how='all', inplace=True)

In [8]:
truck1Df.isnull().sum()/len(truck1Df)

Time (DateTime)                            0.000000e+00
1730 Automatic Start & Stop (V)            2.663506e-01
1730 Batteries (V)                         3.082429e-04
4649 Alternator (A)                        3.090649e-04
4649 Battery Out (A)                       3.090649e-04
4649 Trailer (A)                           3.090649e-04
4649 Inverter (A)                          3.090649e-04
4649 Fridge (A)                            3.090649e-04
4649 Battery Bank (A)                      3.090649e-04
4649 Battery Separator (A)                 3.090649e-04
4649 Battery Voltage (V)                   3.090649e-04
Engine Output Torque (%)                   0.000000e+00
HiRes Engine Output Torque (%)             0.000000e+00
Outside Air Temperature (C)                4.109905e-06
Urea Tank Temperature (C)                  1.201736e-03
Outside Air Pressure (kPa)                 4.109905e-06
Boost Pressure (kPa)                       0.000000e+00
Cruise Control Speed (km/hr)               3.280

# Group the data by UTC Day

In [9]:
dayDf = truck1Df.groupby('UTC Day')
day1Df = dayDf.get_group(5)
day2Df = dayDf.get_group(6)
day3Df = dayDf.get_group(7)
day4Df = dayDf.get_group(8)
day5Df = dayDf.get_group(9)
day6Df = dayDf.get_group(10)
day7Df = dayDf.get_group(12)

In [34]:
def getDistance(df):
    df.dropna(subset=['Vehicle Speed (Wheel-Based; km/hr)'], inplace=True) #remove rows with no speed data
    df.asfreq(freq='30S')                        #Convert to 100Hz using mean
    return (df['Vehicle Speed (Wheel-Based; km/hr)']/3600).sum()

In [38]:
day2Df

Unnamed: 0,Time (DateTime),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,Vehicle Speed (Wheel-Based; km/hr),CPU Load (%),UTC Hour,UTC Minute,UTC Second,UTC Month,UTC Day,UTC Year,Altitude(m),Speed (km/hr)
29369,08/06/2019 15:32:50.417,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29370,08/06/2019 15:32:50.517,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29371,08/06/2019 15:32:50.617,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29372,08/06/2019 15:32:50.717,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29373,08/06/2019 15:32:50.817,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29374,08/06/2019 15:32:50.917,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29375,08/06/2019 15:32:51.017,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29376,08/06/2019 15:32:51.117,,13.421378,175.826848,27.826734,0.536927,15.959030,0.052453,0.023842,-0.232700,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29377,08/06/2019 15:32:51.217,,13.432822,184.743839,26.781491,0.260357,17.458228,0.048638,0.071527,-1.148241,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29378,08/06/2019 15:32:51.317,,13.439689,185.049020,27.597848,0.277523,23.115511,0.046731,-0.004768,-1.194018,...,0.0,85.0,19.0,32.0,51.0,8.0,6.0,19.0,195.000000,0.0


Get the distance traveled by day

In [49]:
pd.to_datetime(day2Df['Time (DateTime)'])


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
day2Df['Time (DateTime)']

In [40]:
day2Df.dropna(subset=['Vehicle Speed (Wheel-Based; km/hr)'], inplace=True)
day2Df.set_index('Time (DateTime)')
day2Df.asfreq(freq='10S')
day2Df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Time (DateTime),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,Vehicle Speed (Wheel-Based; km/hr),CPU Load (%),UTC Hour,UTC Minute,UTC Second,UTC Month,UTC Day,UTC Year,Altitude(m),Speed (km/hr)
29369,08/06/2019 15:32:50.417,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29370,08/06/2019 15:32:50.517,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29371,08/06/2019 15:32:50.617,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29372,08/06/2019 15:32:50.717,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29373,08/06/2019 15:32:50.817,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29374,08/06/2019 15:32:50.917,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29375,08/06/2019 15:32:51.017,,,,,,,,,,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29376,08/06/2019 15:32:51.117,,13.421378,175.826848,27.826734,0.536927,15.959030,0.052453,0.023842,-0.232700,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29377,08/06/2019 15:32:51.217,,13.432822,184.743839,26.781491,0.260357,17.458228,0.048638,0.071527,-1.148241,...,0.0,66.0,19.0,32.0,50.0,8.0,6.0,19.0,194.600006,0.0
29378,08/06/2019 15:32:51.317,,13.439689,185.049020,27.597848,0.277523,23.115511,0.046731,-0.004768,-1.194018,...,0.0,85.0,19.0,32.0,51.0,8.0,6.0,19.0,195.000000,0.0


In [36]:
truck1Df.groupby('UTC Day').apply(getDistance)

UTC Day
5.0     6891.309723
6.0     6891.309723
7.0     6891.309723
8.0     6891.309723
9.0     6891.309723
10.0    6891.309723
12.0    6891.309723
dtype: float64

In [33]:
getDistance(day2Df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


1584.7529132564487

In [18]:
(By100Hz['Vehicle Speed (Wheel-Based; km/hr)']/3600).sum()

689.140842869888

In [21]:
day1Df['Vehicle Speed (Wheel-Based; km/hr)'].sum()/len(day1Df)

82.02285585218011

In [22]:
day1Df['Vehicle Speed (Wheel-Based; km/hr)'].describe()

count    302461.000000
mean         82.022856
std          25.490542
min           0.000000
25%          92.160156
50%          92.957031
75%          93.078125
max         121.367188
Name: Vehicle Speed (Wheel-Based; km/hr), dtype: float64

##### GPS Speed vs. Wheel-Based Speed
Selecting the GPS Speed and Wheel-Based Speed columns.

In [None]:
truck1Df.shape

In [None]:
columns = ['Time (DateTime)', 'Speed (km/hr)', 'Vehicle Speed (Wheel-Based; km/hr)']
gpsVsWheel = truck1Df[columns].head()
gpsVsWheel.head()
# Stopping here; plotting this data would be much more effective if it was per day. We should discuss this point before proceeding.