In [None]:
%pip install xhtml2pdf
%pip install plotly==5.10.0

In [66]:
from IPython.display import display, HTML
from xhtml2pdf import pisa 
from dotenv import load_dotenv
from sqlalchemy import create_engine
from datetime import datetime, timedelta
from os import getenv
import pandas as pd
import plotly.express as px
import io
from base64 import b64encode
import os

In [3]:
load_dotenv()

True

In [4]:
db_host = getenv('DB_HOST')
db_port = getenv('DB_PORT')
db_user = getenv('DB_USER')
db_password = getenv('DB_PASSWORD')
db_name = getenv('DB_NAME')
group_user = getenv('GROUP_USER')
group_user_pass = getenv('GROUP_USER_PASS')

In [5]:
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

In [6]:
con = engine.connect()

In [7]:
def get_current_date() -> str:
   """
   Returns the current date (note :currently set as yesterdays date)
   """
   current_date = str(datetime.now().date())
   return current_date
current_date = get_current_date()
current_date

'2022-10-08'

In [8]:
query = """
SELECT * FROM yusra_stories_production.rides
"""
pd.read_sql_query(query, con)

Unnamed: 0,ride_id,user_id,start_time,end_time,total_duration,max_heart_rate_bpm,min_heart_rate_bpm,avg_heart_rate_bpm,avg_resistance,avg_rpm,total_power_kilojoules
0,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
1,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
2,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
3,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
4,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
5,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
6,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
7,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
8,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72
9,127,4674,2022-10-08 09:06:31,2022-10-08 09:15:12,0:08:40,170,55,107,38,50,13.72


### Quering database

#### Number of rides completed in the past day

In [9]:
query = f"""
WITH rides AS (  
    SELECT *, CAST(start_time AS DATE) AS start_date
    FROM yusra_stories_production.rides
    )
SELECT COUNT(*) AS number_of_rides 
FROM rides 
WHERE start_date = '{current_date}';
"""
number_of_rides = pd.read_sql_query(query, con)
number_of_rides

Unnamed: 0,number_of_rides
0,11


#### Gender split of riders of the past day

In [22]:
query = f"""
WITH rides AS (  
    SELECT *, CAST(start_time AS DATE) AS start_date
    FROM yusra_stories_production.rides
    ),
riders AS (
SELECT DISTINCT (user_id), name, gender, age
FROM yusra_stories_production.users
JOIN rides
USING (user_id)
WHERE start_date = '{current_date}'
)
SELECT gender, COUNT(*) AS number_of_riders
FROM riders
GROUP BY gender;
"""
riders_gender_split = pd.read_sql_query(query, con)
riders_gender_split_fig = px.pie(riders_gender_split, values='number_of_riders', names='gender', title=f'Gender split of riders of the past day', color_discrete_sequence=px.colors.sequential.Greens_r)

#### Ages of the riders of the past day

In [11]:
query = f"""
WITH rides AS (  
    SELECT *, CAST(start_time AS DATE) AS start_date
    FROM yusra_stories_production.rides
    )
SELECT DISTINCT (user_id), name, gender, age
FROM yusra_stories_production.users
JOIN rides
USING (user_id)
WHERE start_date = '{current_date}'
ORDER BY age ASC
"""
ages_of_riders = pd.read_sql_query(query, con)
px.bar()

Unnamed: 0,user_id,name,gender,age
0,4674,Frank Sutton,male,45.0


#### Average power and heart rate of riders of past day

In [12]:
query = f"""
WITH rides AS (  
    SELECT *, CAST(start_time AS DATE) AS start_date
    FROM yusra_stories_production.rides
    )
SELECT user_id, AVG(avg_heart_rate_bpm) AS average_heart_rate, AVG(total_power_kilojoules) AS average_power
FROM yusra_stories_production.users
JOIN rides
USING (user_id)
WHERE start_date = '{current_date}'
GROUP BY user_id
"""
riders_average_power_and_heart_rate = pd.read_sql_query(query, con)
riders_average_power_and_heart_rate

Unnamed: 0,user_id,average_heart_rate,average_power
0,4674,107.0,13.72


### Saving fig as image

#### Option 1: encoding (currently not working properly)

In [62]:
def fig_to_base64(fig):
    buffer = io.StringIO()
    fig.write_html(buffer)
    html_bytes = buffer.getvalue().encode()
    encoded = b64encode(html_bytes).decode('utf8')

    return encoded

In [63]:
encoded = fig_to_base64(riders_gender_split_fig)

In [64]:
my_html = '<img src="data:image/html;base64, {}">'.format(encoded)

#### Option 2: saving to directory

In [None]:
def create_directory_for_images():
    if not os.path.exists("images"):
        os.mkdir("images")

### Create the HTML Template

In [65]:
def report_block_template(my_html, caption=''):

    graph_block = (''
               + my_html 
            )

    report_block = ('' +
        graph_block +
        '{caption}' + 
        '<br>'      + 
        '</a>' +
        '<br>' +
        '<hr>')                       
    report_layout = (
       '<h2>Deloton Exercise Bikes Daily Report</h2>'
       + '<hr>'
       + report_block
    )
    return report_layout.format(my_html=my_html, caption=caption)


# graph_relative_path = 'age_distr.png'

report = report_block_template(my_html, caption='Dis a graph')


In [47]:
display(HTML(report))

### Convert the HTML to PDF

In [15]:

def convert_html_to_pdf(source_html, output_filename):

    result_file = open(output_filename, "w+b")

    pisa_status = pisa.CreatePDF(
            source_html,           
            dest=result_file)           

    result_file.close()           

    return pisa_status.err

In [16]:
convert_html_to_pdf(report, 'report.pdf')

0

In [17]:
! open report.pdf