In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
import geopandas as gpd
import numpy as np
from envirocar import TrackAPI, DownloadClient, BboxSelector, ECConfig
config = ECConfig()
track_api = TrackAPI(api_client=DownloadClient(config=config))
bbox = BboxSelector([
    7.554130554199218, # min_x
    51.95590322041212, # min_y
    7.590351104736328, # max_x
    51.97874790276371  # max_y
])

# issue a query with 40 tracks
track_df = track_api.get_tracks(bbox=bbox, num_results=40) 

## 0. Example
Here, you see an example dataframe as how you will receive when using the envirocar-py package to request multiple tracks via the enviroCar API.

In [None]:
track_df.head()

## 1. All variables which you receive when requesting data via the enviroCar API
Below you see all available variables and its datatype. Not all tracks provide the full set of data, therefore, dependent on the tracks you receive, the set on variables may be smaller.  
You will notice that there are variables which end with '.value' or with '.unit'. While variables which end with '.value' actually hold values, variables ending with '.unit' hold the unit information relevant for the variable.

In [None]:
columnname=[]
datatype=[]
for column in track_df:
    columnname.append(column) 
    datatype.append(track_df[column].dtype)
d = {'column name':columnname, 'data type':datatype}
pd.DataFrame(d)

### 1.1 Units of variables
Here you can inspect the units of the numerical variables.

In [None]:
print('VARIABLE [UNIT]')
print('---')
units = track_df.filter(like='.unit').columns
for unit in units:
    if unit in track_df:
        print(track_df[unit].name, track_df[unit].dropna().unique())

### 1.2 OBD based data
Following numeric variables are recorded by the OBD

In [None]:
OBD = track_df[['Engine Load.value','Rpm.value', 
                'Intake Pressure.value', 'Intake Temperature.value', 
                'Speed.value','Throttle Position.value', 'MAF.value', 
                'O2 Lambda Voltage ER.value', 'O2 Lambda Voltage.value']]

print('OBD BASED DATA')
print('---')
for column in OBD:
    print(column)

### 1.3 GPS based data 
The following numeric variables are recorded by the GPS-Sensor

In [None]:
GPS = track_df[['geometry','GPS Accuracy.value','GPS Altitude.value', 
                'GPS Bearing.value', 'GPS HDOP.value', 
                'GPS PDOP.value','GPS Speed.value', 'GPS VDOP.value']]

print('GPS BASED DATA')
print('---')
for column in GPS:
    print(column)

## 2. Geometry variable
As the data comes as geodataframe it contains a geometry variable, which is in this case a point geometry, for each measurement a single point geometry which contains latitude and longitude coordinates.

In [None]:
print('GEOMETRY VARIABLE')
print('---')
geomV=track_df[['geometry']]
for column in geomV:
    print(column)

See example point geometries below:

In [None]:
geomV.head()

## 3. Numeric variables
There are two types of numeric variables: integer 64 and float 64.

### 3.1 Variables from datatype:  float64
Most variables are from datatype float64.
Some variables are the result from calculations based on enviroCar data.  
  
The following variables are calculated based on enviroCar data:  
- <b> Calculated MAF.value <b> 
- Consumption.value  
- CO2.value 
- CO2 Emission (GPS-based).value 
- Consumption (GPS-based).value 

For detailed information on the calculations please refer to the following two 52°North blog posts:  
https://blog.52north.org/2020/07/02/fuel-consumption-models-in-envirocar/  
https://blog.52north.org/2020/04/01/estimating-fuel-consumption-of-cars/


In [None]:
print('NUMERIC VARIABLES FLOAT64:')
print('---')
for column in track_df.select_dtypes(['float64']):
    print(column)    

### 3.2 Variables from datatype:  Int64
The following variables are the only integer64 types in the data

In [None]:
print('NUMERIC VARIABLES INT64:')
print('---')
for column in track_df.select_dtypes(['int64']):
    print(column)    

## 4. Object variables
The following variables are objects, i.e. strings:

In [None]:
print('OBJECT VARIABLES')
print('---')
units = track_df.filter(like='.unit').columns.to_list()
for column in track_df.select_dtypes(['object']):
    if column not in units:
        print(track_df[column].name)

### 4.1 Categorical variables
Categorical variables are here defined as object variables which are no indices. Further they are of ordinal type and provide, well, categories :)

In [None]:
print('CATEGORICAL VARIABLES')
print('---')
catV=track_df[['sensor.type', 'sensor.model', 'sensor.fuelType', 'sensor.manufacturer',
               'track.appVersion', 'track.touVersion']]
for column in catV:
    print(column)

### 4.2 Indices
There are two indices which are relevant for the track and measurement identification:  
<b>track.id : each track has a unique track.id  
id: each measurement within a certain track has a unique id. This id is also unique in the whole sample

In [None]:
print('INDICES')
print('---')
indicesV=track_df[['id','track.id']]
for column in indicesV:
    print(column)

### 4.3 Time variables
There are three object variables related to time:  
<b>time: time stamp of data point  
track.begin: timestamp of first measurement of the track   
track.end: timestamp of last measurement of the track

In [None]:
print('TIME VARIABLES')
print('---')
timeV=track_df[['time','track.begin','track.end']]
for column in timeV:
    print(column)