# Volvo Trucks Analytics

### Imports and global variables go here

In [42]:
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import datetime

#The two truck data set paths are set here.
truck1csv = "../data/trucks/truck1.csv"
truck2csv = "../data/trucks/truck2.csv"

#The two column header dictonary paths are set here.
truck1dictcsv = "../data/dictionary/truck1dict.csv"
truck2dictcsv = "../data/dictionary/truck2dict.csv"

#Number of records desired from the data set.
numberOfRecords = 100

#If the number of values that are NOT Na type pass this percentage, the row will be deleted
rowNaNThresholdPercent = 75

#Create column conversion dictionaries from CSV files.
truck1dict={}
truck2dict={}
with open(truck1dictcsv, 'r') as f:
    for row in csv.reader(f):
        truck1dict.update({row[0]:row[1]})
        
with open(truck2dictcsv, 'r') as f:
    for row in csv.reader(f):
        truck2dict.update({row[0]:row[1]})


### Define all cleaning functions here

In [43]:
def readCsv(truck):
    return pd.read_csv(truck, header = [0])
    
def cullUtcCols(truckData):
    noUTC = truckData.drop(["UTC hour", "UTC minute", "UTC second", "UTC month", "UTC day", "UTC year"], axis = 1)
    return noUTC

"""Takes a DataFrame and a conversion dictionary as parameters;
    uses the dictionary to rename all matching columns then returns the changed DataFrame."""
def renameColumns(dataFrame, dictionary):
    return dataFrame.rename(columns = dictionary)

"""Finds threshold of Non-NA type using percentage and deletes rows."""
def removeUnnecessaryRows(dataFrame):
    length = len(dataFrame.columns)
    TValue = (rowNaNThresholdPercent / 100) * length
    print("Threshold value: " + str(int(TValue)))
    return dataFrame.dropna(thresh = int(TValue))

"""Finds columns with all Na types and deletes them."""
def removeUnnecessaryColumns(dataFrame):
    return dataFrame.dropna(axis=1, how='all')

# NOTE: The following function is unused in this notebook, but it can be used to convert CSV files into
# conversion dictionaries i.e. creating the column rename dictionaries, which was manually done in the
# previous block manually.

"""Creates a Python dictionary from a pre-defined CSV dictionary.
This will only look at the first two columns of the given CSV file."""
def createPythonDictionary(dictionaryCSV):
    # Initialize the resulting dictionary.
    dictResult = {}
    
    # Opens the passed in CSV which defines the rename dictionary and iterates through it to store each value into a Python dictionary.
    with open(dictionaryCSV, 'r') as f:
        for row in csv.reader(f):
            dictResult.update({row[0]:row[1]})
    return dictResult

### Cleaning of Truck 1 Data starts here

In [44]:
truck1data = readCsv(truck1csv)
truck1data = cullUtcCols(truck1data)
truck1data = removeUnnecessaryRows(truck1data)
truck1data = renameColumns(truck1data, truck1dict)
truck1data = removeUnnecessaryColumns(truck1data)

# After cleaning, check the shape of the dataframe
truck1data.shape

MemoryError: Unable to allocate array with shape (50, 1216689) and data type float64

In [None]:
truck1data.head()

In [None]:
truck1data

In [None]:
print(truck1data.describe())

### Cleaning of Truck 2 Data starts here

In [None]:
truck2data = readCsv(truck2csv)
truck2data = cullUtcCols(truck2data)
truck2data = removeUnnecessaryRows(truck2data)
truck2data = renameColumns(truck2data, truck2dict)
truck2data = removeUnnecessaryColumns(truck2data)

# After cleaning, check the shape of the dataframe
truck2data.shape

In [None]:
truck2data.head()

In [None]:
truck2data

In [None]:
print(truck2data.describe())

In [None]:
def returnTruck1():
    return truck1data

def returnTruck2():
    return truck2data

### Concatenation of Truck1 and Truck2 Data

In [18]:
df = pd.concat([truck1data, truck2data], sort=False)

In [19]:
df.head()

Unnamed: 0,Time (DateTime),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,1115 Fridge (A),1115 Trailer (A),1115 Inverter (A),Steering Wheel Angle,Engine Output Torque HiRes (%),Coolant Temperature (C),Pressure of Intake Manifold (kPa),Temperature of Pre-Combustion Air (C),Road Inclination (%),Altitude (m)
16,54:04.6,0.006409,13.946441,97.500381,27.971695,0.277523,1.337072,0.145914,-0.033379,0.843061,...,,,,,,,,,,
17,54:04.7,0.007782,13.953765,96.985389,22.375448,0.275616,5.640116,0.13447,-0.004768,0.774395,...,,,,,,,,,,
18,54:04.8,0.007782,13.95697,98.330091,18.987945,0.273709,1.28748,0.126841,-0.014305,0.736248,...,,,,,,,,,,
19,54:04.9,0.007324,13.953307,94.076638,19.07187,0.277523,6.105516,0.136378,-0.071527,0.774395,...,,,,,,,,,,
20,54:05.0,0.007324,13.957885,96.365492,21.677348,0.264172,3.751812,0.128748,-0.071527,0.759136,...,,,,,,,,,,


In [20]:
df.describe()

Unnamed: 0,1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),4649 Battery Voltage (V),...,1115 Fridge (A),1115 Trailer (A),1115 Inverter (A),Steering Wheel Angle,Engine Output Torque HiRes (%),Coolant Temperature (C),Pressure of Intake Manifold (kPa),Temperature of Pre-Combustion Air (C),Road Inclination (%),Altitude (m)
count,7.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,...,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0
mean,0.007128,13.961723,97.647103,22.294751,0.277964,4.907976,0.129482,-0.028244,0.796697,12.173953,...,0.570648,-58.218989,-2.244721,-0.2819,1.875,79.0,101.241379,33.0,2.365517,337.844828
std,0.000692,0.009799,3.240392,3.216954,0.013218,1.806796,0.008549,0.026582,0.049097,0.000647,...,0.032676,16.007856,0.034165,0.030425,0.0,0.0,4.222856,0.0,0.076885,0.483725
min,0.005951,13.946441,90.080682,17.767224,0.262264,1.28748,0.115396,-0.071527,0.736248,12.17258,...,0.50927,-62.5,-2.294575,-0.306617,1.875,79.0,94.0,33.0,2.2,337.5
25%,0.006867,13.956054,96.727893,19.07187,0.271801,4.747463,0.124933,-0.033379,0.759136,12.173495,...,0.54818,-62.5,-2.271687,-0.304664,1.875,79.0,98.0,33.0,2.4,337.5
50%,0.007324,13.959258,98.206111,22.276265,0.277523,5.640116,0.128748,-0.023842,0.774395,12.173953,...,0.56878,-62.5,-2.244984,-0.302711,1.875,79.0,102.0,33.0,2.4,337.5
75%,0.007553,13.967498,99.102579,25.068666,0.277523,5.678264,0.13447,-0.004768,0.843061,12.174411,...,0.58938,-62.5,-2.22591,-0.259742,1.875,79.0,106.0,33.0,2.4,338.5
max,0.007782,13.984894,104.071298,27.971695,0.315671,7.558938,0.145914,0.004768,0.881209,12.174868,...,0.642786,-0.420577,-2.168688,-0.22068,1.875,79.0,106.0,33.0,2.4,338.5


In [21]:
print(df.describe())

       1730 Automatic Start & Stop (V)  1730 Batteries (V)  \
count                         7.000000           13.000000   
mean                          0.007128           13.961723   
std                           0.000692            0.009799   
min                           0.005951           13.946441   
25%                           0.006867           13.956054   
50%                           0.007324           13.959258   
75%                           0.007553           13.967498   
max                           0.007782           13.984894   

       4649 Alternator (A)  4649 Battery Out (A)  4649 Trailer (A)  \
count            13.000000             13.000000         13.000000   
mean             97.647103             22.294751          0.277964   
std               3.240392              3.216954          0.013218   
min              90.080682             17.767224          0.262264   
25%              96.727893             19.071870          0.271801   
50%              98.2

### Group Truck1 by Weight

In [22]:
byWeight = truck1data.groupby('Vehicle Weight (kg)')

In [23]:
byWeight.head()

Unnamed: 0,Time (DateTime),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,RoadInclinationVP15,Steering Wheel Angle (radian),Total Distance (m),Transmission Lube Temperature (C),Vehicle Speed (km/hr),Vehicle Weight (kg),Vehicle Speed (Wheel-Based; km/hr),CPU Load (%),Altitude(m),Speed (km/hr)
16,54:04.6,0.006409,13.946441,97.500381,27.971695,0.277523,1.337072,0.145914,-0.033379,0.843061,...,1.4,-5.660133,246045850,27.84375,8.167969,18000,8.167969,40,0,6.1116
17,54:04.7,0.007782,13.953765,96.985389,22.375448,0.275616,5.640116,0.13447,-0.004768,0.774395,...,1.4,-5.96482,246045850,27.84375,8.167969,18000,8.167969,40,0,6.1116
18,54:04.8,0.007782,13.95697,98.330091,18.987945,0.273709,1.28748,0.126841,-0.014305,0.736248,...,1.2,-6.244117,246045850,27.84375,8.117188,18000,8.167969,40,0,6.1116
19,54:04.9,0.007324,13.953307,94.076638,19.07187,0.277523,6.105516,0.136378,-0.071527,0.774395,...,1.2,-6.509742,246045850,27.84375,8.117188,18000,8.117188,88,0,6.6672
20,54:05.0,0.007324,13.957885,96.365492,21.677348,0.264172,3.751812,0.128748,-0.071527,0.759136,...,1.2,-6.781227,246045850,27.84375,7.820312,18000,7.820312,88,0,6.6672


In [24]:
print(byWeight.describe())

                    1730 Automatic Start & Stop (V)                      \
                                              count      mean       std   
Vehicle Weight (kg)                                                       
18000                                           7.0  0.007128  0.000692   

                                                                       \
                          min       25%       50%       75%       max   
Vehicle Weight (kg)                                                     
18000                0.005951  0.006867  0.007324  0.007553  0.007782   

                    1730 Batteries (V)             ... Altitude(m)       \
                                 count       mean  ...         75%  max   
Vehicle Weight (kg)                                ...                    
18000                             13.0  13.961723  ...         0.0  0.0   

                    Speed (km/hr)                                              \
                        

### Group Truck2 by Weight

In [25]:
byWeight2 = truck2data.groupby('Vehicle Weight (kg)', axis=0)

In [26]:
byWeight2.head()

Unnamed: 0,Time (DateTime),1115 Alternator (A),1115 Battery Out (A),1115 Fridge (A),1115 Trailer (A),1115 Inverter (A),Total Distance (m),Engine Speed (rpm),Vehicle Speed (Wheel-Based; km/hr),Steering Wheel Angle,...,Temperature of Pre-Combustion Air (C),Transmission Lube Temperature (C),Road Inclination (%),Vehicle Weight (kg),Outside Air Temperature (C),Vehicle Speed (km/hr),Boost Pressure (kPa),CPU Load (%),Altitude (m),Speed (km/hr)
0,49:09.0,-35.452125,4.949645,0.555047,-0.430114,-2.283131,,1096.375,106.886719,-0.304664,...,33,85.0,2.4,18000,16.9375,106.886719,196,75,337.5,29.323334
1,49:09.1,-35.480735,5.270085,0.626764,-0.420577,-2.294575,,1098.875,107.101562,-0.304664,...,33,85.0,2.4,18000,16.9375,107.101562,198,75,337.5,29.323334
2,49:09.2,-35.480735,5.197604,0.56878,-62.5,-2.271687,,1096.75,107.089844,-0.304664,...,33,85.0,2.4,18000,16.9375,107.089844,198,75,337.5,29.323334
3,49:09.3,-35.452125,5.41886,0.638209,-62.5,-2.290761,411617045.0,1098.375,106.921875,-0.304664,...,33,85.0,2.4,18000,16.9375,106.921875,200,75,337.5,29.323334
4,49:09.4,-35.471199,5.228122,0.590143,-62.5,-2.283131,411617045.0,1101.75,107.109375,-0.304664,...,33,85.0,2.4,18000,16.9375,107.109375,200,75,337.5,29.323334


In [27]:
print(byWeight2.describe())

                    1115 Alternator (A)                                  \
                                  count       mean       std        min   
Vehicle Weight (kg)                                                       
18000                              29.0 -35.466923  0.013864 -35.490272   

                                                                 \
                           25%        50%        75%        max   
Vehicle Weight (kg)                                               
18000               -35.480735 -35.471199 -35.452125 -35.442588   

                    1115 Battery Out (A)            ... Altitude (m)         \
                                   count      mean  ...          75%    max   
Vehicle Weight (kg)                                 ...                       
18000                               29.0  5.459902  ...        338.5  338.5   

                    Speed (km/hr)                                           \
                            count  

In [28]:
byWeight2 = truck2data.groupby('Vehicle Weight (kg)')

for key, item in byWeight2:
    print(byWeight2.get_group(key), "\n\n")

   Time (DateTime)  1115 Alternator (A)  1115 Battery Out (A)  \
0          49:09.0           -35.452125              4.949645   
1          49:09.1           -35.480735              5.270085   
2          49:09.2           -35.480735              5.197604   
3          49:09.3           -35.452125              5.418860   
4          49:09.4           -35.471199              5.228122   
5          49:09.5           -35.452125              5.544747   
6          49:09.6           -35.442588              5.285344   
7          49:09.7           -35.461662              5.151827   
8          49:09.8           -35.471199              5.010681   
9          49:09.9           -35.461662              5.350195   
10         49:10.0           -35.442588              4.873350   
11         49:10.1           -35.480735              4.991608   
12         49:10.2           -35.461662              5.457008   
13         49:10.3           -35.452125              5.479896   
14         49:10.4       

In [29]:
df = truck1data

This sums all the data around the grouping by 'Vehicle Weight'

In [30]:
df.groupby(['Vehicle Weight (kg)'], as_index=False).sum()

Unnamed: 0,Vehicle Weight (kg),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,Temperature of Air Intake Manifold (C),RoadInclinationVP15,Steering Wheel Angle (radian),Total Distance (m),Transmission Lube Temperature (C),Vehicle Speed (km/hr),Vehicle Speed (Wheel-Based; km/hr),CPU Load (%),Altitude(m),Speed (km/hr)
0,18000,0.049897,181.502403,1269.412337,289.831769,3.613527,63.803693,1.683261,-0.36717,10.357061,...,351,14.6,-94.39032,3198596050,361.96875,102.261719,102.433594,1000,0,85.006796


In [31]:
df2 = truck2data

This groups by the 'Vehicle Weight' then uses the mean as the new values in the rest of the columns

In [32]:
df2.groupby(['Vehicle Weight (kg)'], as_index=False).mean()

Unnamed: 0,Vehicle Weight (kg),1115 Alternator (A),1115 Battery Out (A),1115 Fridge (A),1115 Trailer (A),1115 Inverter (A),Total Distance (m),Engine Speed (rpm),Vehicle Speed (Wheel-Based; km/hr),Steering Wheel Angle,...,Pressure of Intake Manifold (kPa),Temperature of Pre-Combustion Air (C),Transmission Lube Temperature (C),Road Inclination (%),Outside Air Temperature (C),Vehicle Speed (km/hr),Boost Pressure (kPa),CPU Load (%),Altitude (m),Speed (km/hr)
0,18000,-35.466923,5.459902,0.570648,-58.218989,-2.244721,411617100.0,1102.737069,107.425377,-0.2819,...,101.241379,33.0,85.021552,2.365517,16.9375,107.425377,198.62069,81.206897,337.844828,29.46525


In [33]:
df.groupby(['Vehicle Weight (kg)'], as_index=False).mean()

Unnamed: 0,Vehicle Weight (kg),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,Temperature of Air Intake Manifold (C),RoadInclinationVP15,Steering Wheel Angle (radian),Total Distance (m),Transmission Lube Temperature (C),Vehicle Speed (km/hr),Vehicle Speed (Wheel-Based; km/hr),CPU Load (%),Altitude(m),Speed (km/hr)
0,18000,0.007128,13.961723,97.647103,22.294751,0.277964,4.907976,0.129482,-0.028244,0.796697,...,27.0,1.123077,-7.260794,246045850.0,27.84375,7.866286,7.879507,76.923077,0.0,6.538984


In [34]:
dftest = truck1data

In [35]:
dftest.groupby(['Vehicle Weight (kg)'], as_index=False).mean()

Unnamed: 0,Vehicle Weight (kg),1730 Automatic Start & Stop (V),1730 Batteries (V),4649 Alternator (A),4649 Battery Out (A),4649 Trailer (A),4649 Inverter (A),4649 Fridge (A),4649 Battery Bank (A),4649 Battery Separator (A),...,Temperature of Air Intake Manifold (C),RoadInclinationVP15,Steering Wheel Angle (radian),Total Distance (m),Transmission Lube Temperature (C),Vehicle Speed (km/hr),Vehicle Speed (Wheel-Based; km/hr),CPU Load (%),Altitude(m),Speed (km/hr)
0,18000,0.007128,13.961723,97.647103,22.294751,0.277964,4.907976,0.129482,-0.028244,0.796697,...,27.0,1.123077,-7.260794,246045850.0,27.84375,7.866286,7.879507,76.923077,0.0,6.538984


In [37]:
truckonetype =  pd.to_datetime(truck1data['Time (DateTime)'])

truckonetype.head(100)

ValueError: hour must be in 0..23

In [38]:
truckTwotype =  pd.to_datetime(truck2data['Time (DateTime)'])

truckTwotype.head(100)

ValueError: hour must be in 0..23

For the function `divideByDay()` make sure to pass in a DataFrame of a truck.
If you want specific column of data then just specify it by using quotation and the accurate name of the column.
Example:
This line will get daily averages for all columns:
`print(divideByDay(truck1data)`
This line will get daily average of speed:
`print(divideByDay(truck1data, "Speed (km/hr)"))`

In [39]:
def divideByDay(truck_df, byday_df=None):
    dates = []
    #truck_df = truck_df.head()
    for index, tdata in truck_df.iterrows():
        dates.append(tdata['Time (DateTime)'].split(' ')[0])
        #print(index, dates)
        
    truck_df['Time (DateTime)'] = dates
    
    if(byday_df):
        byday_df = truck_df.groupby('Time (DateTime)')[byday_df].mean()
    else:
        byday_df = truck_df.groupby('Time (DateTime)').mean()

    return byday_df

Truck 2 daily Average speed 

In [40]:
print(divideByDay(truck2data, "Speed (km/hr)"))

Time (DateTime)
49:09.0    29.323334
49:09.1    29.323334
49:09.2    29.323334
49:09.3    29.323334
49:09.4    29.323334
49:09.5    29.323334
49:09.6    29.323334
49:09.7    29.323334
49:09.8    29.323334
49:09.9    29.323334
49:10.0    29.323334
49:10.1    29.323334
49:10.2    29.323334
49:10.3    29.323334
49:10.4    29.323334
49:10.5    29.323334
49:10.6    29.323334
49:10.7    29.323334
49:10.8    29.323334
49:10.9    29.734889
49:11.0    29.734889
49:11.1    29.734889
49:11.2    29.734889
49:11.3    29.734889
49:11.4    29.734889
49:11.5    29.734889
49:11.6    29.734889
49:11.7    29.734889
49:11.8    29.734889
Name: Speed (km/hr), dtype: float64


Truck 1 daily Average speed 

In [41]:
print(divideByDay(truck1data, "Speed (km/hr)"))

Time (DateTime)
54:04.6    6.1116
54:04.7    6.1116
54:04.8    6.1116
54:04.9    6.6672
54:05.0    6.6672
54:05.1    6.6672
54:05.2    6.6672
54:05.3    6.6672
54:05.4    6.6672
54:05.5    6.6672
54:05.6    6.6672
54:05.7    6.6672
54:05.8    6.6672
Name: Speed (km/hr), dtype: float64
