# 3-Exploring the Data
In this notebook we start exploring the database downloaded and created by the previous notebooks. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from sqlapi import VedDb

Start by creating the `db` variable of type `VedDb`. This is our API to the database. All the queries and modifications will happen through this object.

In [2]:
db = VedDb()

The `VedDb` class contains a number of convenience functions to help you explore the database through Pandas DataFrames. The `query` function executes an arbitrary SQL query against the database and stores the result on a Pandas DataFrame for further inspection or analysis.

The following command reads all the vehicles into a DataFrame:

In [3]:
vehicles = db.query('select * from vehicle')

We can now run a Pandas query on that DataFrame:

In [4]:
vehicles.groupby(by='vehicle_type').count()

Unnamed: 0_level_0,vehicle_id,vehicle_class,engine,transmission,drive_wheels,weight
vehicle_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
EV,3,3,3,0,3,3
HEV,92,1,92,3,0,92
ICE,264,4,264,79,0,248
PHEV,24,24,24,12,24,24


In [6]:
db.query_df("select * from vehicle").head(20)

Unnamed: 0,vehicle_id,vehicle_type,vehicle_class,engine,transmission,drive_wheels,weight
0,2,ICE,Car,4-FI 2.0L T/C,,,3500.0
1,5,HEV,Car,4-GAS/ELECTRIC 2.0L,,,3500.0
2,7,ICE,SUV,6-FI 3.6L,AUTOMATIC,,4500.0
3,8,ICE,Car,4-FI 1.5L,5-SP MANUAL,,2500.0
4,9,PHEV,Car,4-GAS/ELECTRIC 1.4L,,FWD,4000.0
5,10,EV,Car,ELECTRIC,,FWD,3500.0
6,11,PHEV,Car,4-GAS/ELECTRIC 2.0L,CVT,FWD,4000.0
7,12,ICE,Car,4-FI 1.8L,,,2500.0
8,108,ICE,,8-4V/FI 6.0L,6-SP AUTOMATIC,,6000.0
9,110,ICE,,6-FI 3.6L,,,4500.0


In [7]:
db.head("select * from signal where vehicle_id = 10", rows=20)

Unnamed: 0,signal_id,day_num,vehicle_id,trip_id,time_stamp,latitude,longitude,speed,maf,rpm,...,ac_power_kw,ac_power_w,heater_power_w,hv_bat_current,hv_bat_soc,hv_bat_volt,st_ftb_1,st_ftb_2,lt_ftb_1,lt_ftb_2
0,18254768,1.719774,10,1558,0,42.277066,-83.763404,53.59,,,...,,0.0,2250.0,-21.5,96.341469,386.0,,,,
1,18254769,1.719774,10,1558,200,42.277066,-83.763404,51.98,,,...,,0.0,2250.0,-21.5,96.341469,386.0,,,,
2,18254770,1.719774,10,1558,1200,42.277066,-83.763404,50.369999,,,...,,0.0,2250.0,-21.5,96.341469,386.0,,,,
3,18254771,1.719774,10,1558,1500,42.277066,-83.763404,50.369999,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,
4,18254772,1.719774,10,1558,2300,42.277066,-83.763404,49.799999,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,
5,18254773,1.719774,10,1558,3300,42.277066,-83.763404,51.57,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,
6,18254774,1.719774,10,1558,4400,42.277066,-83.763404,52.939999,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,
7,18254775,1.719774,10,1558,5000,42.277254,-83.762512,52.939999,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,
8,18254776,1.719774,10,1558,5400,42.277254,-83.762512,54.299999,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,
9,18254777,1.719774,10,1558,6000,42.277254,-83.762512,54.299999,,,...,,0.0,2250.0,23.5,96.341469,390.5,,,,


In [8]:
db.tail("select * from signal where vehicle_id = 10", rows=20)

Unnamed: 0,signal_id,day_num,vehicle_id,trip_id,time_stamp,latitude,longitude,speed,maf,rpm,...,ac_power_kw,ac_power_w,heater_power_w,hv_bat_current,hv_bat_soc,hv_bat_volt,st_ftb_1,st_ftb_2,lt_ftb_1,lt_ftb_2
167237,1734624,374.499878,10,3271,412000,42.302582,-83.705806,63.289997,,,...,,400.0,500.0,9.5,96.341469,381.5,,,,
167238,1734625,374.499878,10,3271,412200,42.302582,-83.705806,63.289997,,,...,,400.0,500.0,39.0,96.341469,381.5,,,,
167239,1734626,374.499878,10,3271,413000,42.302582,-83.705806,59.759998,,,...,,400.0,500.0,39.0,96.341469,381.5,,,,
167240,1734627,374.499878,10,3271,414000,42.302582,-83.705806,56.16,,,...,,400.0,500.0,39.0,96.341469,381.5,,,,
167241,1734628,374.499878,10,3271,414300,42.302582,-83.705806,56.16,,,...,,400.0,500.0,38.5,96.341469,381.5,,,,
167242,1734629,374.499878,10,3271,414900,42.302604,-83.705229,56.16,,,...,,400.0,500.0,38.5,96.341469,381.5,,,,
167243,1734630,374.499878,10,3271,415100,42.302604,-83.705229,52.66,,,...,,400.0,500.0,38.5,96.341469,381.5,,,,
167244,1734631,374.499878,10,3271,415200,42.302604,-83.705229,52.66,,,...,,400.0,500.0,38.5,96.341469,390.5,,,,
167245,1734632,374.499878,10,3271,415300,42.302604,-83.705229,52.66,,,...,,400.0,500.0,39.0,96.341469,390.5,,,,
167246,1734633,374.499878,10,3271,416100,42.302604,-83.705229,46.399998,,,...,,400.0,500.0,39.0,96.341469,390.5,,,,


In [20]:
sql = """
select count(0) from (
    select distinct vehicle_id, day_num
    from signal 
) as distinct_trips
"""
db.head(sql)

Unnamed: 0,count(0)
0,32548


In [22]:
db.head("select count(distinct trip_id) from signal")

Unnamed: 0,count(distinct trip_id)
0,4153
