# Load database

In [None]:
%pip install duckdb==0.9.1 duckdb-engine==0.9.1 jupysql jupysql-plugin

In [1]:
# Loading in SQL extension
%reload_ext sql

# Initiating a DuckDB database named 'car_data.duckdb' to run SQL queries
%sql duckdb:///../data/database/car_data.duckdb

In [2]:
%sqlcmd tables

Name
fuel
hybrid
electric
all_vehicles


In [3]:
%sqlcmd explore --table all_vehicles

In [4]:
%sqlcmd explore --table electric

## Store queries

### Tables for Plot 1

In [5]:
%%sql
CREATE TABLE q_1_hybrid_electric AS
    SELECT DISTINCT model_year, vehicle_type, COUNT(id) AS num_vehicles
    FROM all_vehicles
    WHERE vehicle_type = 'hybrid' OR vehicle_type = 'electric'
    GROUP BY model_year, vehicle_type
    ORDER BY num_vehicles DESC;

Count


In [6]:
%%sql
CREATE TABLE q_1_fuel AS
    SELECT DISTINCT model_year, vehicle_type, COUNT(id) AS num_vehicles
    FROM fuel
    GROUP BY model_year, vehicle_type
    ORDER BY model_year;

Count


#### Verify data

In [7]:
%%sql 
SELECT * FROM q_1_hybrid_electric;

model_year,vehicle_type,num_vehicles
2023,electric,158
2022,electric,82
2021,electric,49
2022,hybrid,41
2020,electric,40
2021,hybrid,40
2019,electric,35
2023,hybrid,33
2020,hybrid,31
2016,electric,27


In [8]:
%%sql 
SELECT * FROM q_1_fuel;

model_year,vehicle_type,num_vehicles
1995,fuel-only,841
1996,fuel-only,698
1997,fuel-only,658
1998,fuel-only,634
1999,fuel-only,688
2000,fuel-only,639
2001,fuel-only,679
2002,fuel-only,740
2003,fuel-only,820
2004,fuel-only,898


## Table for Plot 2

In [9]:
%%sql 
CREATE TABLE boxplot_fuel_consum AS
    SELECT consumption_city_l_100_km, consumption_hwy_l_100_km, consumption_comb_l_100_km, co2_emissions_g_km
    FROM all_vehicles;

Count


#### Verify data

In [10]:
%%sql
SELECT * FROM boxplot_fuel_consum; 

consumption_city_l_100_km,consumption_hwy_l_100_km,consumption_comb_l_100_km,co2_emissions_g_km
1.9,2.4,2.1,0.0
2.2,2.6,2.4,0.0
2.1,2.4,2.2,0.0
1.9,2.4,2.1,0.0
2.2,2.6,2.4,0.0
1.9,2.5,2.2,0.0
1.9,2.5,2.2,0.0
2.5,2.5,2.5,0.0
2.5,2.4,2.5,0.0
2.7,2.6,2.6,0.0


## Table for Plot 3

In [11]:
%%sql
CREATE TABLE electric_range_charge AS
    SELECT range_km, recharge_time_h, vehicle_class, model_year
    FROM electric

Count


#### Verify data

In [12]:
%%sql
SELECT * FROM  electric_range_charge

range_km,recharge_time_h,vehicle_class,model_year
100.0,7.0,subcompact,2012
117.0,7.0,mid-size,2012
122.0,4.0,compact,2013
100.0,7.0,subcompact,2013
117.0,7.0,mid-size,2013
109.0,8.0,two-seater,2013
109.0,8.0,two-seater,2013
224.0,6.0,full-size,2013
335.0,10.0,full-size,2013
426.0,12.0,full-size,2013


## Table for Plot 4

In [13]:
%%sql
CREATE TABLE hist_co2 AS
    SELECT vehicle_type, fuel_type, co2_emissions_g_km
    FROM all_vehicles
    WHERE co2_emissions_g_km IS NOT NULL;

Count


#### Verify data

In [14]:
%%sql
SELECT * FROM hist_co2

vehicle_type,fuel_type,co2_emissions_g_km
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0
electric,electricity,0.0


## Tables for Plot 5

In [15]:
%%sql
CREATE TABLE co2_usa AS
    SELECT vehicle_type, make, co2_emissions_g_km, transmission_type
    FROM all_vehicles
    WHERE co2_emissions_g_km IS NOT NULL AND
    vehicle_type IN ('fuel-only', 'hybrid') AND
    make IN ('cadillac', 'chevrolet', 'chrysler', 'ford', 'jeep', 'lincoln')

Count


#### Verify data

In [16]:
%%sql
SELECT * FROM co2_usa;

vehicle_type,make,co2_emissions_g_km,transmission_type
fuel-only,cadillac,206.0,automatic with select Shift
fuel-only,cadillac,221.0,automatic with select Shift
fuel-only,cadillac,216.0,automatic with select Shift
fuel-only,cadillac,231.0,automatic with select Shift
fuel-only,cadillac,239.0,automatic with select Shift
fuel-only,cadillac,244.0,automatic with select Shift
fuel-only,cadillac,297.0,automatic with select Shift
fuel-only,cadillac,303.0,manual
fuel-only,cadillac,207.0,automatic with select Shift
fuel-only,cadillac,252.0,automatic with select Shift
