In [1]:
import pandas as pd
import plotly.express as px
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

load_dotenv()
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_NAME = os.getenv("DB_NAME")
PRODUCTION_SCHEMA = 'zookeepers_production'

pd.options.plotting.backend = "plotly"

def get_engine_connection():
        """Connects to postgreSQL DBMS on AWS Aurora

        Returns:
            DB engine
        """
        conn_string = (
            f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
        )

        return create_engine(conn_string)

In [11]:
query = f"""
    WITH user_gender_dob AS (
        SELECT user_id, gender,
            DATE_PART('year', AGE(CURRENT_DATE, date_of_birth))
                AS age 
            FROM {PRODUCTION_SCHEMA}.users
    ),

    rides_before AS (
        SELECT *
            FROM {PRODUCTION_SCHEMA}.rides
            WHERE begin_timestamp > (CURRENT_DATE) and begin_timestamp < (CURRENT_DATE + 1)
    )

    SELECT ugd.user_id, rb.ride_id, ugd.gender, ugd.age, rb.begin_timestamp,
        rb.total_duration_sec, rb.total_power, rb.mean_power, rb.mean_resistance,
            rb.mean_rpm, rb.mean_heart_rate

        FROM user_gender_dob AS ugd
        RIGHT JOIN rides_before AS rb
            ON ugd.user_id = rb.user_id
"""

df = pd.read_sql(query, con=get_engine_connection())

In [24]:
df

Unnamed: 0,user_id,ride_id,gender,age,begin_timestamp,total_duration_sec,total_power,mean_power,mean_resistance,mean_rpm,mean_heart_rate
0,4729,412,Male,36.0,2022-10-10 00:05:31,520,25094.693,48.259025,40.546154,48.828846,81.936538
1,4730,413,Male,62.0,2022-10-10 00:14:14,460,26529.998,57.673909,40.117391,45.152174,136.084783
2,4730,414,Male,62.0,2022-10-10 00:21:57,520,27644.600,53.162692,39.846154,49.542308,121.601923
3,4730,415,Male,62.0,2022-10-10 00:30:39,520,21844.442,42.008542,39.761538,48.482692,128.094231
4,4730,416,Male,62.0,2022-10-10 00:39:22,520,21291.876,40.945915,39.661538,46.976923,119.178846
...,...,...,...,...,...,...,...,...,...,...,...
91,4754,503,Female,49.0,2022-10-10 13:01:57,520,19744.258,37.969727,39.838462,48.525000,114.592308
92,4754,504,Female,49.0,2022-10-10 13:10:40,520,18586.284,35.742854,39.711538,48.550000,104.357692
93,4754,505,Female,49.0,2022-10-10 13:19:22,520,24305.368,46.741092,39.753846,51.061538,25.146154
94,4755,506,Female,54.0,2022-10-10 13:28:05,520,31331.125,60.252163,42.530769,48.328846,105.467308


### Daily Report Requirements

* Number of rides completed in the past day

* Gender split of riders of the past day

* Ages of the riders of the past day

* Average power and heart rate of riders of the past day

In [17]:
row_count = len(df)
row_count

96

In [30]:
gender_df = df['gender'].value_counts()
gender_df.values
gender_fig = px.pie(gender_df,values=gender_df.values,names=gender_df.index)
gender_fig

In [34]:
age_bin = [0,15,30,45,60,75,90,105]
age_df = df['age'].value_counts(bins=age_bin,sort=False)
age_range_list = ['0-15','15-30','30-45','45-60','60-75','75-90','90-105+']

age_bin_ticks = age_df.index.astype(str)
age_fig = px.bar(x=age_bin_ticks, y=age_df.values)

age_fig.update_xaxes(tickvals=age_bin_ticks, ticktext = age_range_list)

