In [1]:
# %pip install xhtml2pdf
# %pip install plotly==5.10.0

In [2]:
# %pip install -U kaleido

In [3]:
from IPython.display import display, HTML
from xhtml2pdf import pisa 
from dotenv import load_dotenv
from sqlalchemy import create_engine
from datetime import datetime, timedelta
from os import getenv
import pandas as pd
import plotly.express as px
import io
from base64 import b64encode
import os

In [4]:
load_dotenv()

True

In [5]:
db_host = getenv('DB_HOST')
db_port = getenv('DB_PORT')
db_user = getenv('DB_USER')
db_password = getenv('DB_PASSWORD')
db_name = getenv('DB_NAME')
group_user = getenv('GROUP_USER')
group_user_pass = getenv('GROUP_USER_PASS')

In [8]:
def get_current_date() -> str:
   """
   Returns the current date (note :currently set as yesterdays date)
   """
   current_date = str(datetime.now().date() - timedelta(days = 1))
   return current_date
current_date = get_current_date()
current_date

'2022-10-08'

### Quering database

#### Number of rides completed in the past day

In [34]:
def get_number_of_rides():
    query = f"""
    WITH rides AS (  
        SELECT *, CAST(start_time AS DATE) AS start_date
        FROM yusra_stories_production.rides
        )
    SELECT COUNT(*) AS number_of_rides 
    FROM rides 
    WHERE start_date = '{current_date}';
    """
    number_of_rides = pd.read_sql_query(query, con)
    return number_of_rides._get_value(0,"number_of_rides")

7

#### Gender split of riders of the past day

In [37]:
def get_rider_gender_split_fig(current_date, con):
    query = f"""
    WITH rides AS (  
        SELECT *, CAST(start_time AS DATE) AS start_date
        FROM yusra_stories_production.rides
        ),
    riders AS (
    SELECT DISTINCT (user_id), name, gender, age
    FROM yusra_stories_production.users
    JOIN rides
    USING (user_id)
    WHERE start_date = '{current_date}'
    )
    SELECT gender, COUNT(*) AS number_of_riders
    FROM riders
    GROUP BY gender;
    """
    riders_gender_split = pd.read_sql_query(query, con)
    riders_gender_split_fig = px.pie(riders_gender_split, values='number_of_riders', names='gender', title=f'Gender split of riders of the past day', color_discrete_sequence=px.colors.sequential.Greens_r)
    return riders_gender_split_fig

#### Ages of the riders of the past day

In [38]:
def get_age_of_riders_fig(current_date, con):
    query = f"""
    WITH rides AS (  
        SELECT *, CAST(start_time AS DATE) AS start_date
        FROM yusra_stories_production.rides
        ),
    distinct_riders AS (
        SELECT DISTINCT (user_id), age
        FROM yusra_stories_production.users
        JOIN rides
        USING (user_id)
        WHERE start_date = '{current_date}'
        ORDER BY age ASC
        )
    SELECT age, COUNT(*) AS number_of_riders
    FROM distinct_riders
    GROUP BY age
    ORDER BY age
    """
    ages_of_riders = pd.read_sql_query(query, con)
    ages_of_riders_fig = px.pie(ages_of_riders, values='number_of_riders', names='age', title=f'Age of riders', color_discrete_sequence=px.colors.sequential.Greens_r)
    return ages_of_riders_fig

#### Average power and heart rate of riders of past day

In [32]:
def get_average_ride_stats_fig(current_date, con):
    query = f"""
    WITH rides AS (  
        SELECT *, CAST(start_time AS DATE) AS start_date
        FROM yusra_stories_production.rides
        )
    SELECT user_id, ROUND(AVG(avg_heart_rate_bpm)) AS average_heart_rate_bpm, ROUND(AVG(total_power_kilojoules)) AS average_power_KJ
    FROM yusra_stories_production.users
    JOIN rides
    USING (user_id)
    WHERE start_date = '{current_date}'
    GROUP BY user_id
    """
    riders_average_power_and_heart_rate = pd.read_sql_query(query, con)
    riders_average_power_and_heart_rate_fig = px.bar(riders_average_power_and_heart_rate, x= 'average_power_kj', y='average_heart_rate_bpm', 
        color_discrete_sequence=px.colors.sequential.Greens_r, 
        labels=dict(average_power_kj ="Average power (KJ)", average_heart_rate_bpm="Average heart rate (bpm"),
        title = 'Average power vs Average heart rate for each rider'
        )
    return riders_average_power_and_heart_rate_fig

### Saving fig as image

#### Option 2: saving to directory

In [39]:
def get_graphs_list(current_date, con):
    riders_gender_split_fig = get_rider_gender_split_fig(current_date, con)
    ages_of_riders_fig = get_age_of_riders_fig(current_date, con)
    riders_average_power_and_heart_rate_fig = get_average_ride_stats_fig(current_date, con)
    graphs = [riders_gender_split_fig, ages_of_riders_fig, riders_average_power_and_heart_rate_fig]
    return graphs

In [14]:
def create_directory_for_images():
    if not os.path.exists("images"):
        os.mkdir("images")
create_directory_for_images()

In [40]:
def save_image_as_png(fig, fig_name):
    fig.write_image(f"images/{fig_name}.png")

### Create the HTML Template

In [16]:
# def report_block_template(fig_name, caption=''):

#     graph_block =  (''
            
#                 '<img style="height: 400px;" src="images/{fig_name}.png">'
#            )

#     report_block = ('' +
#         graph_block +
#         '{caption}' + 
#         '<br>'      + 
#         '</a>' +
#         '<br>' +
#         '<hr>')                       
#     report_layout = (
#        '<h2>Deloton Exercise Bikes Daily Report</h2>'
#        + '<hr>'
#        + report_block
#     )
#     return report_layout.format(fig_name=fig_name, caption=caption)


# report = report_block_template('riders_gender_split_fig', caption='Gender split')


In [17]:
# display(HTML(report))

### Convert the HTML to PDF

In [18]:

# def convert_html_to_pdf(source_html, output_filename):

#     result_file = open(output_filename, "w+b")

#     pisa_status = pisa.CreatePDF(
#             source_html,           
#             dest=result_file)           

#     result_file.close()           

#     return pisa_status.err

In [19]:
# convert_html_to_pdf(report, 'report.pdf')

In [20]:
# ! open report.pdf

### Function calls

In [None]:
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')
con = engine.connect()
current_date = get_current_date()
graphs = get_graphs_list(current_date, con)
create_directory_for_images()