In [16]:
from psycopg2 import connect, sql
import os
from dotenv import load_dotenv #pip install python-dotenv
from os import environ as env
import pandas as pd


load_dotenv()
host= os.getenv('host')
dbname= os.getenv('dbname')
user= os.getenv('user')
password= os.getenv('password')

if 'user' in env:
    print(env['user'])


conn_params = {
    'host': host,
    'dbname': dbname,
    'user': user,
    'password': password
}

readonly_user


This function takes an sql query and connects to a database using psycopg2

It returns the result of the query as well as the column names

In [22]:
import psycopg2


def fetch_data(query):
    with psycopg2.connect(**conn_params) as conn:
        with conn.cursor() as cur:
            cur.execute(query)
            data = cur.fetchall()
            colnames = [desc[0] for desc in cur.description]
    return data, colnames


Here we are looking at data for a club, we are specifically interested in the facid (facility id) and slots (time slot) columns.
Using the fetch_data function above, we select the facid and slots columns from the bookings table and create a new DF with the query results

In [23]:
# Extract bookings data
query = "SELECT facid, slots FROM bookings;"
bookings_data, colnames = fetch_data(query)
bookings_df = pd.DataFrame(bookings_data, columns=colnames)

bookings_df.head()

Unnamed: 0,facid,slots
0,3,2
1,4,2
2,6,2
3,7,2
4,8,1


Create a new column 'booking_duration' which is equal to the value of the slots columns * 30.

In [24]:
# Transform data: Calculate total booking duration per facility
# Assuming each slot represents 30 minutes of booking time
bookings_df['booking_duration'] = bookings_df['slots'] * 30  # Convert slots to minutes
bookings_df


Unnamed: 0,facid,slots,booking_duration
0,3,2,60
1,4,2,60
2,6,2,60
3,7,2,60
4,8,1,30
...,...,...,...
4039,8,1,30
4040,8,1,30
4041,8,1,30
4042,8,1,30


Grouped the rows by facid and get the sum of all the booking durtations for each facility id

In [25]:
total_duration_per_facility = bookings_df.groupby('facid')['booking_duration'].sum().reset_index()
total_duration_per_facility


Unnamed: 0,facid,booking_duration
0,0,39600
1,1,38340
2,2,36270
3,3,24900
4,4,42120
5,5,6840
6,6,33120
7,7,27240
8,8,27330


Rename the columns

In [26]:
total_duration_per_facility.columns = ['FacilityID', 'TotalBookingDurationInMinutes']
total_duration_per_facility

Unnamed: 0,FacilityID,TotalBookingDurationInMinutes
0,0,39600
1,1,38340
2,2,36270
3,3,24900
4,4,42120
5,5,6840
6,6,33120
7,7,27240
8,8,27330


In [29]:
etl_bites_conn_string = "host='localhost' port='5432' dbname='etl_bites' user='joemiller' password=''"

In [31]:
def execute_query_postgresql(conn_string, query):
    with psycopg2.connect(conn_string) as conn:
        with conn.cursor() as cur:
            cur.execute(query)
            conn.commit()

# SQL query to create a new table for storing total booking duration per facility
create_total_duration_table = '''
CREATE TABLE total_booking_duration (
    FacilityID INTEGER NOT NULL,
    TotalBookingDurationInMinutes INTEGER NOT NULL
);
'''

# Execute the query to create the table
execute_query_postgresql(etl_bites_conn_string, create_total_duration_table)

In [32]:
def insert_data(conn_string, table_name, data, columns):
    with psycopg2.connect(conn_string) as conn:
        with conn.cursor() as cur:
            for row in data.itertuples(index=False):
                insert_query = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({', '.join(['%s'] * len(columns))});"
                cur.execute(insert_query, row)
            conn.commit()

# Insert the transformed data into the analytical database
insert_data(etl_bites_conn_string, 'total_booking_duration', total_duration_per_facility, ['FacilityID', 'TotalBookingDurationInMinutes'])

In [33]:
%load_ext sql

In [34]:
%sql postgresql+psycopg2://joemiller:@localhost:5432/etl_bites

In [35]:
%%sql

SELECT *
FROM total_booking_duration;

 * postgresql+psycopg2://joemiller:***@localhost:5432/etl_bites
9 rows affected.


facilityid,totalbookingdurationinminutes
0,39600
1,38340
2,36270
3,24900
4,42120
5,6840
6,33120
7,27240
8,27330
