In [1]:
import os

import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
import altair as alt

In [2]:
alt.renderers.enable("mimetype");

In [3]:
load_dotenv()

connection_string = "postgresql://{user}:{password}@{host}:{port}/{db}".format(
    user=os.getenv('DB_USER'),
    password=os.getenv('DB_PASSWORD'),
    host=os.getenv('DB_HOST'),
    port=os.getenv('DB_PORT'),
    db=os.getenv('DB_NAME')
)
engine = create_engine(connection_string)

### Basic stats

In [4]:
query = """
SELECT
    COUNT(*)
FROM glideator_mart.fact_flights
"""
cnt_flights = int(pd.read_sql(query, engine).iloc[0, 0])


query = """
SELECT
    COUNT(DISTINCT pilot)
FROM glideator_mart.fact_flights
"""
cnt_pilots = int(pd.read_sql(query, engine).iloc[0, 0])


query = """
SELECT
    COUNT(DISTINCT site)
FROM glideator_mart.fact_flights
"""
cnt_sites = int(pd.read_sql(query, engine).iloc[0, 0])


print("Number of flights: ", cnt_flights)
print("Number of pilots: ", cnt_pilots)
print("Number of sites: ", cnt_sites)

Number of flights:  1211925
Number of pilots:  31311
Number of sites:  252


### Site popularity

In [5]:
query = """
SELECT
    site,
    COUNT(*) AS flights,
    COUNT(DISTINCT pilot) AS pilots
FROM glideator_mart.fact_flights
GROUP BY site
ORDER BY COUNT(*) DESC
LIMIT 50
"""
popularity = pd.read_sql(query, engine)


barchart = alt.Chart(popularity).mark_bar(color='steelblue').encode(
    x=alt.X('site:N', sort=alt.EncodingSortField(field='flights', order='descending'), title='Launch Site'),
    y=alt.Y('flights:Q', title='Total Flights'),
    tooltip=['site:N', 'flights:Q']
).properties(
    width=800,
    height=400,
    title='Total Flights by Launch Site'
)
barchart.show()

barchart = alt.Chart(popularity).mark_bar(color='steelblue').encode(
    x=alt.X('site:N', sort=alt.EncodingSortField(field='pilots', order='descending'), title='Launch Site'),
    y=alt.Y('pilots:Q', title='Total Pilots'),
    tooltip=['site:N', 'pilots:Q']
).properties(
    width=800,
    height=400,
    title='Total Pilots by Launch Site'
)
barchart.show()

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


### How many sites a pilot has visited?

In [6]:
# Compute histogram in SQL, then plot as a simple barchart
query = """
SELECT
    cnt_sites,
    COUNT(*) AS num_pilots
FROM (
    SELECT
        pilot,
        COUNT(DISTINCT site) AS cnt_sites
    FROM glideator_mart.fact_flights
    GROUP BY pilot
) pilot_sites
GROUP BY cnt_sites
ORDER BY cnt_sites
"""
histogram_df = pd.read_sql(query, engine)

hist_chart = alt.Chart(histogram_df).mark_bar(color='teal').encode(
    x=alt.X('cnt_sites:O', title='Number of Sites Visited by Pilot'),
    y=alt.Y('num_pilots:Q', title='Number of Pilots'),
    tooltip=[alt.Tooltip('num_pilots:Q', title='Number of Pilots'), alt.Tooltip('cnt_sites:O', title='Sites Visited')]
).properties(
    width=600,
    height=400,
    title='Distribution of Sites Visited per Pilot'
)
hist_chart.show()

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [7]:
# Compute histogram in SQL, then plot as a simple barchart for flights per pilot
query = """
SELECT
    num_flights,
    COUNT(*) AS num_pilots
FROM (
    SELECT
        pilot,
        COUNT(*) AS num_flights
    FROM glideator_mart.fact_flights
    GROUP BY pilot
) pilot_flights
GROUP BY num_flights
ORDER BY num_flights
LIMIT 100
"""
flights_histogram_df = pd.read_sql(query, engine)

flights_hist_chart = alt.Chart(flights_histogram_df).mark_bar(color='orange').encode(
    x=alt.X('num_flights:O', title='Number of Flights per Pilot'),
    y=alt.Y('num_pilots:Q', title='Number of Pilots'),
    tooltip=[alt.Tooltip('num_pilots:Q', title='Number of Pilots'), alt.Tooltip('num_flights:O', title='Flights')]
).properties(
    width=600,
    height=400,
    title='Distribution of Flights per Pilot'
)
flights_hist_chart.show()

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting
