# Using Bokeh to make Visuals with our PostgreSQL data

### Bokeh is a powerful tool that allows us to display interactive visuals. Note that the graphs generated here will not load in the Githb repository. They will be downloaded in the "Extraction/Graphs" directory 

### For more information on Bokeh, please visit their documentation here: 'https://docs.bokeh.org/en/latest/docs/user_guide.html'

In [1]:
# Import python libraries
import psycopg2
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.transform import factor_cmap
from bokeh.io import export_png

# Import postgres server info key
from postgres_info import user, password

### Use 'psycopg2' to extract all registered CYBERTRUCKS in Washington state using the SQL statement: 
SELECT county, model <br>
FROM vehicles <br>
JOIN location_info <br>
ON vehicles.postal_code = location_info.postal_code <br>
JOIN vehicle_types <br>
ON vehicles.vehicle_type_id = vehicle_types.vehicle_type_id <br>
WHERE model = 'CYBERTRUCK' <br>
AND state = 'WA'; <br>

In [2]:
# Use a 'try' statement to attempt to conenct to database and to handle errors that occur
try:
    # Connect to the database
    conn = psycopg2.connect(
        host="127.0.0.1",
        port="5432",
        user=user,
        password=password,
        database="ev_db"
    )

# Use an 'except' block to handle error connecting to database    
except psycopg2.Error as e:
    print("Error connecting to the database:")
    print(e)

# Use 'else' block to continue code if connection establishes successfully
else:
    print("Connection established successfully")

# Create a cursor object
cursor = conn.cursor()

# Write a SQL query to pull all registered Tesla CYBERTRUCKs in Washington state
cybertruck_query = "SELECT county, model FROM vehicles JOIN location_info ON vehicles.postal_code = location_info.postal_code JOIN vehicle_types ON vehicles.vehicle_type_id = vehicle_types.vehicle_type_id WHERE model = 'CYBERTRUCK' AND state = 'WA';"

# Execute the above SQL query
cursor.execute(cybertruck_query)

# Fetch all rows from the result
rows = cursor.fetchall()

# Use a for loop to pull column names from 'cursor description'
column_names = [desc[0] for desc in cursor.description]

# Create a new dataframe using the extracted data
cybertruck_df = pd.DataFrame(rows, columns=column_names)

# Close the cursor and the connection
cursor.close()
conn.close()

# Display the dataframe
cybertruck_df

Connection established successfully


Unnamed: 0,county,model
0,King,CYBERTRUCK
1,King,CYBERTRUCK
2,King,CYBERTRUCK
3,King,CYBERTRUCK
4,King,CYBERTRUCK
...,...,...
854,Franklin,CYBERTRUCK
855,Pierce,CYBERTRUCK
856,Whatcom,CYBERTRUCK
857,Grays Harbor,CYBERTRUCK


In [3]:
# Create a new dataframe that counts records by county
cybertruck_count = cybertruck_df.groupby('county').agg(Count=('county','size')).sort_values(by='Count', ascending=False).reset_index()

# Display the dataframe
cybertruck_count.head()

Unnamed: 0,county,Count
0,King,398
1,Snohomish,94
2,Pierce,75
3,Clark,64
4,Kitsap,33


In [4]:
# Create a bar chart with the 'cybertruck_count' dataframe
cybertruck_bar = figure(x_range=cybertruck_count['county'], title="Cybertrucks by County", x_axis_label='County', y_axis_label='Count of Cybertrucks', frame_width = 2000)

# Add a bar glyph
cybertruck_bar.vbar(x='county', top='Count', width=0.9, source=cybertruck_count)

# Output the plot
output_notebook()

# Export the graph as a png file to "Graphs" directory
export_png(cybertruck_bar, filename="Graphs/cybertrucks_by_county.png") 

# Display the graph
show(cybertruck_bar)

### Extract vehicles and their vehicle type data from all rows with an 'electric_range' value above '0' using this SQL statement:
SELECT vehicles.vehicle_type_id, electric_range, model_year, make, model <br>
FROM vehicles <br>
JOIN vehicle_types <br>
ON vehicles.vehicle_type_id = vehicle_types.vehicle_type_id <br>
WHERE electric_range > 0;

In [5]:
# Use a 'try' statement to attempt to conenct to database and to handle errors that occur
try:
    # Connect to the database
    conn = psycopg2.connect(
        host="127.0.0.1",
        port="5432",
        user=user,
        password=password,
        database="ev_db"
    )

# Use an 'except' block to handle error connecting to database    
except psycopg2.Error as e:
    print("Error connecting to the database:")
    print(e)

# Use 'else' block to continue code if connection establishes successfully
else:
    print("Connection established successfully")


# Create a cursor object
cursor = conn.cursor()

# Write a SQL query that pulls the vehicle and vehicle type for every record with an 'electric_range' value above 0
electric_range_query = "SELECT vehicles.vehicle_type_id, electric_range, model_year, make, model FROM vehicles JOIN vehicle_types ON vehicles.vehicle_type_id = vehicle_types.vehicle_type_id WHERE electric_range > 0;"

# Execute the query
cursor.execute(electric_range_query)

# Fetch all rows from the result
rows = cursor.fetchall()

# Use a for loop to pull column names from 'cursor description'
column_names = [desc[0] for desc in cursor.description]

# Create a new dataframe using the extracted data
electric_range_df = pd.DataFrame(rows, columns=column_names)

# Close the cursor and the connection
cursor.close()
conn.close()

# Show dataframe
electric_range_df.head()

Connection established successfully


Unnamed: 0,vehicle_type_id,electric_range,model_year,make,model
0,vm0027,75.0,2013,NISSAN,LEAF
1,vm0199,270.0,2019,TESLA,MODEL S
2,vm0129,25.0,2017,TOYOTA,PRIUS PRIME
3,vm0081,19.0,2016,FORD,C-MAX
4,vm0237,266.0,2020,TESLA,MODEL 3


In [6]:
# Combine the vehicle type data into a single column called 'Vehicle Name'
electric_range_df['Vehicle Name'] = electric_range_df['model_year'] + ' ' + electric_range_df['make'] + ' ' + electric_range_df['model']

# Create a new dataframe with the new column and electric_range
ev_range_df = electric_range_df[['Vehicle Name','electric_range']]

# Display the dataframe
ev_range_df.head()

Unnamed: 0,Vehicle Name,electric_range
0,2013 NISSAN LEAF,75.0
1,2019 TESLA MODEL S,270.0
2,2017 TOYOTA PRIUS PRIME,25.0
3,2016 FORD C-MAX,19.0
4,2020 TESLA MODEL 3,266.0


In [7]:
# Use the aggregate function to find the average 'electric_range' for each model sorted from highest to lowest
ev_mean = ev_range_df.groupby('Vehicle Name').agg(Average=('electric_range','mean')).sort_values(by='Average', ascending=False).reset_index()

# Create a new dataframe with the top 10 values of the last dataframe
top_10_er = ev_mean.head(10)

# Display the dataframe
top_10_er

Unnamed: 0,Vehicle Name,Average
0,2020 TESLA MODEL S,331.423729
1,2020 TESLA MODEL 3,298.24381
2,2020 TESLA MODEL X,291.662841
3,2020 TESLA MODEL Y,291.0
4,2019 TESLA MODEL X,289.0
5,2019 TESLA MODEL S,270.0
6,2012 TESLA MODEL S,265.0
7,2020 CHEVROLET BOLT EV,259.0
8,2019 HYUNDAI KONA,258.0
9,2020 HYUNDAI KONA,258.0


In [8]:
# Create a bar graph with the 'top_10_er' dataframe
top_10_range_graph = figure(x_range=top_10_er['Vehicle Name'], title="10 EVs with Highest Range (2024)", x_axis_label='Vehicle Model', y_axis_label='Average Electric Range', frame_width = 1500)

# Add a bar glyph
top_10_range_graph.vbar(x='Vehicle Name', top='Average', width=0.7, source=top_10_er, color='purple')

# Output the plot
output_notebook()

# Export the graph as a png file to "Graphs" directory
export_png(top_10_range_graph, filename="Graphs/top_10_ev_range.png") 

# Display the graph
show(top_10_range_graph)