In [2]:
import pandas as pd
pd.set_option('display.max_columns', None)
import geopandas as gpd
import numpy as np
from envirocar import TrackAPI, DownloadClient, BboxSelector, ECConfig
config = ECConfig()
track_api = TrackAPI(api_client=DownloadClient(config=config))
bbox = BboxSelector([
    7.554130554199218, # min_x
    51.95590322041212, # min_y
    7.590351104736328, # max_x
    51.97874790276371  # max_y
])
# issue a query with 40 tracks
track_df = track_api.get_tracks(bbox=bbox, num_results=40) 

## 0. Example
Here, you see an example dataframe as how you will receive when using the envirocar-py package to request multiple tracks via the enviroCar API.

In [3]:
track_df.head()

Unnamed: 0,id,time,geometry,Engine Load.value,Engine Load.unit,Calculated MAF.value,Calculated MAF.unit,Speed.value,Speed.unit,CO2.value,CO2.unit,Intake Pressure.value,Intake Pressure.unit,Rpm.value,Rpm.unit,Intake Temperature.value,Intake Temperature.unit,Consumption (GPS-based).value,Consumption (GPS-based).unit,GPS Altitude.value,GPS Altitude.unit,Throttle Position.value,Throttle Position.unit,GPS Bearing.value,GPS Bearing.unit,Consumption.value,Consumption.unit,GPS Accuracy.value,GPS Accuracy.unit,CO2 Emission (GPS-based).value,CO2 Emission (GPS-based).unit,GPS Speed.value,GPS Speed.unit,track.id,track.length,track.begin,track.end,sensor.type,sensor.engineDisplacement,sensor.model,sensor.id,sensor.fuelType,sensor.constructionYear,sensor.manufacturer,track.appVersion,track.touVersion,GPS HDOP.value,GPS HDOP.unit,GPS PDOP.value,GPS PDOP.unit,GPS VDOP.value,GPS VDOP.unit,MAF.value,MAF.unit,O2 Lambda Voltage ER.value,O2 Lambda Voltage ER.unit,O2 Lambda Voltage.value,O2 Lambda Voltage.unit
0,5f0ef89c00375c5a2641ef86,2020-07-15T12:37:03+00:00,POINT (7.57939 51.96766),30.459892,%,3.113889,g/s,15.97893,km/h,2.40547,kg/h,29.667201,kPa,748.952252,u/min,26.0,c,0.936199,l/h,115.671012,m,13.0,%,136.590329,deg,1.023604,l/h,6.0,%,2.200068,kg/h,17.50266,km/h,5f0ef89c00375c5a2641ef84,0.665466,2020-07-15T12:37:03Z,2020-07-15T12:38:25Z,car,1699,A 170,559e22c2e4b07207d8977998,gasoline,2004,Mercedes Benz,,,,,,,,,,,,,,
1,5f0ef89c00375c5a2641ef88,2020-07-15T12:37:09+00:00,POINT (7.57955 51.96757),49.230105,%,9.778811,g/s,11.134565,km/h,7.554102,kg/h,47.232322,kPa,1475.604745,u/min,25.652838,c,0.937457,l/h,115.280639,m,16.642229,%,134.479803,deg,3.214511,l/h,6.0,%,2.203025,kg/h,10.324164,km/h,5f0ef89c00375c5a2641ef84,0.665466,2020-07-15T12:37:03Z,2020-07-15T12:38:25Z,car,1699,A 170,559e22c2e4b07207d8977998,gasoline,2004,Mercedes Benz,,,,,,,,,,,,,,
2,5f0ef89c00375c5a2641ef89,2020-07-15T12:37:14+00:00,POINT (7.57988 51.96740),78.649652,%,25.066406,g/s,33.97633,km/h,19.363722,kg/h,74.040426,kPa,2397.395931,u/min,23.728013,c,5.102906,l/h,114.613231,m,23.862069,%,123.313954,deg,8.239881,l/h,6.193485,%,11.99183,kg/h,30.967132,km/h,5f0ef89c00375c5a2641ef84,0.665466,2020-07-15T12:37:03Z,2020-07-15T12:38:25Z,car,1699,A 170,559e22c2e4b07207d8977998,gasoline,2004,Mercedes Benz,,,,,,,,,,,,,,
3,5f0ef89c00375c5a2641ef8a,2020-07-15T12:37:19+00:00,POINT (7.58049 51.96715),31.2004,%,7.419664,g/s,39.0,km/h,5.731668,kg/h,30.075758,kPa,1748.565672,u/min,24.0,c,2.280491,l/h,113.835218,m,15.351261,%,125.03578,deg,2.439007,l/h,6.0,%,5.359154,kg/h,36.898346,km/h,5f0ef89c00375c5a2641ef84,0.665466,2020-07-15T12:37:03Z,2020-07-15T12:38:25Z,car,1699,A 170,559e22c2e4b07207d8977998,gasoline,2004,Mercedes Benz,,,,,,,,,,,,,,
4,5f0ef89c00375c5a2641ef8b,2020-07-15T12:37:24+00:00,POINT (7.58107 51.96682),29.735773,%,3.106679,g/s,34.321667,km/h,2.3999,kg/h,29.0,kPa,761.854074,u/min,25.0,c,0.946257,l/h,113.502384,m,13.0,%,133.482068,deg,1.021234,l/h,7.489919,%,2.223704,kg/h,34.447545,km/h,5f0ef89c00375c5a2641ef84,0.665466,2020-07-15T12:37:03Z,2020-07-15T12:38:25Z,car,1699,A 170,559e22c2e4b07207d8977998,gasoline,2004,Mercedes Benz,,,,,,,,,,,,,,


## 1. All variables which you receive when requesting data via the enviroCar API
Below you see all available variables and its datatype. Not all tracks provide the full set of data, therefore, dependent on the tracks you receive, the set on variables may be smaller.  
You will notice that there are variables which end with '.value' or with '.unit'. While variables which end with '.value' actually hold values, variables ending with '.unit' hold the unit information relevant for the variable.

In [4]:
columnname=[]
datatype=[]
for column in track_df:
    columnname.append(column) 
    datatype.append(track_df[column].dtype)
d = {'column name':columnname, 'data type':datatype}
pd.DataFrame(d)

Unnamed: 0,column name,data type
0,id,object
1,time,object
2,geometry,geometry
3,Engine Load.value,float64
4,Engine Load.unit,object
5,Calculated MAF.value,float64
6,Calculated MAF.unit,object
7,Speed.value,float64
8,Speed.unit,object
9,CO2.value,float64


### 1.1 Units of variables
Here you can inspect the units of the numerical variables.

In [5]:
print('VARIABLE [UNIT]')
print('---')
units = track_df.filter(like='.unit').columns
for unit in units:
    if unit in track_df:
        print(track_df[unit].name, track_df[unit].dropna().unique())

VARIABLE [UNIT]
---
Engine Load.unit ['%']
Calculated MAF.unit ['g/s']
Speed.unit ['km/h']
CO2.unit ['kg/h']
Intake Pressure.unit ['kPa']
Rpm.unit ['u/min']
Intake Temperature.unit ['c']
Consumption (GPS-based).unit ['l/h']
GPS Altitude.unit ['m']
Throttle Position.unit ['%']
GPS Bearing.unit ['deg']
Consumption.unit ['l/h']
GPS Accuracy.unit ['%']
CO2 Emission (GPS-based).unit ['kg/h']
GPS Speed.unit ['km/h']
GPS HDOP.unit ['precision']
GPS PDOP.unit ['precision']
GPS VDOP.unit ['precision']
MAF.unit ['l/s']
O2 Lambda Voltage ER.unit ['ratio']
O2 Lambda Voltage.unit ['V']


### 1.2 OBD based data
Following numeric variables are recorded by the OBD

In [6]:
OBD = track_df[['Engine Load.value','Rpm.value', 
                'Intake Pressure.value', 'Intake Temperature.value', 
                'Speed.value','Throttle Position.value', 'MAF.value', 
                'O2 Lambda Voltage ER.value', 'O2 Lambda Voltage.value']]

print('OBD BASED DATA')
print('---')
for column in OBD:
    print(column)

OBD BASED DATA
---
Engine Load.value
Rpm.value
Intake Pressure.value
Intake Temperature.value
Speed.value
Throttle Position.value
MAF.value
O2 Lambda Voltage ER.value
O2 Lambda Voltage.value


### 1.3 GPS based data 
The following numeric variables are recorded by the GPS-Sensor

In [7]:
GPS = track_df[['geometry','GPS Accuracy.value','GPS Altitude.value', 
                'GPS Bearing.value', 'GPS HDOP.value', 
                'GPS PDOP.value','GPS Speed.value', 'GPS VDOP.value']]

print('GPS BASED DATA')
print('---')
for column in GPS:
    print(column)

GPS BASED DATA
---
geometry
GPS Accuracy.value
GPS Altitude.value
GPS Bearing.value
GPS HDOP.value
GPS PDOP.value
GPS Speed.value
GPS VDOP.value


## 2. Geometry variable
As the data comes as geodataframe it contains a geometry variable, which is in this case a point geometry, for each measurement a single point geometry which contains latitude and longitude coordinates.

In [8]:
print('GEOMETRY VARIABLE')
print('---')
geomV=track_df[['geometry']]
for column in geomV:
    print(column)

GEOMETRY VARIABLE
---
geometry


See example point geometries below:

In [9]:
geomV.head()

Unnamed: 0,geometry
0,POINT (7.57939 51.96766)
1,POINT (7.57955 51.96757)
2,POINT (7.57988 51.96740)
3,POINT (7.58049 51.96715)
4,POINT (7.58107 51.96682)


## 3. Numeric variables
There are two types of numeric variables: integer 64 and float 64.

### 3.1 Variables from datatype:  float64
Most variables are from datatype float64.
Some variables are the result from calculations based on enviroCar data.  
  
The following variables are calculated based on enviroCar data:  
- <b> Calculated MAF.value <b> 
- Consumption.value  
- CO2.value 
- CO2 Emission (GPS-based).value 
- Consumption (GPS-based).value 

For detailed information on the calculations please refer to the following two 52°North blog posts:  
https://blog.52north.org/2020/07/02/fuel-consumption-models-in-envirocar/  
https://blog.52north.org/2020/04/01/estimating-fuel-consumption-of-cars/


In [10]:
print('NUMERIC VARIABLES FLOAT64:')
print('---')
for column in track_df.select_dtypes(['float64']):
    print(column)    

NUMERIC VARIABLES FLOAT64:
---
Engine Load.value
Calculated MAF.value
Speed.value
CO2.value
Intake Pressure.value
Rpm.value
Intake Temperature.value
Consumption (GPS-based).value
GPS Altitude.value
Throttle Position.value
GPS Bearing.value
Consumption.value
GPS Accuracy.value
CO2 Emission (GPS-based).value
GPS Speed.value
track.length
GPS HDOP.value
GPS PDOP.value
GPS VDOP.value
MAF.value
O2 Lambda Voltage ER.value
O2 Lambda Voltage.value


### 3.2 Variables from datatype:  Int64
The following variables are the only integer64 types in the data

In [11]:
print('NUMERIC VARIABLES INT64:')
print('---')
for column in track_df.select_dtypes(['int64']):
    print(column)    

NUMERIC VARIABLES INT64:
---
sensor.engineDisplacement
sensor.constructionYear


## 4. Object variables
The following variables are objects, i.e. strings:

In [16]:
print('OBJECT VARIABLES')
print('---')
units = track_df.filter(like='.unit').columns.to_list()
for column in track_df.select_dtypes(['object']):
    if column not in units:
        print(track_df[column].name)

OBJECT VARIABLES
---
id
time
track.id
track.begin
track.end
sensor.type
sensor.model
sensor.id
sensor.fuelType
sensor.manufacturer
track.appVersion
track.touVersion


### 4.1 Categorical variables
Categorical variables are here defined as object variables which are no indices. Further they are of ordinal type and provide, well, categories :)

In [13]:
print('CATEGORICAL VARIABLES')
print('---')
catV=track_df[['sensor.type', 'sensor.model', 'sensor.fuelType', 'sensor.manufacturer',
               'track.appVersion', 'track.touVersion']]
for column in catV:
    print(column)

CATEGORICAL VARIABLES
---
sensor.type
sensor.model
sensor.fuelType
sensor.manufacturer
track.appVersion
track.touVersion


### 4.2 Indices
There are two indices which are relevant for the track and measurement identification:  
<b>track.id : each track has a unique track.id  
id: each measurement within a certain track has a unique id. This id is also unique in the whole sample

In [14]:
print('INDICES')
print('---')
indicesV=track_df[['id','track.id']]
for column in indicesV:
    print(column)

INDICES
---
id
track.id


### 4.3 Time variables
There are three object variables related to time:  
<b>time: ????  
track.begin: timestamp of first measurement of the track  ????  
track.end: timestamp of last measurement of the track ????

In [15]:
print('TIME VARIABLES')
print('---')
timeV=track_df[['time','track.begin','track.end']]
for column in timeV:
    print(column)

TIME VARIABLES
---
time
track.begin
track.end
