Fetch data and import necessary libraries

In [50]:
import pandas as pd
import logging
import os

donation_facilities_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/donations_facility.csv"
donation_state_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/donations_state.csv"
newdonors_facility_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/newdonors_facility.csv"
newdonors_state_url = "https://raw.githubusercontent.com/MoH-Malaysia/data-darah-public/main/newdonors_state.csv"

def get_data_url(url_link):
    try:
        data = pd.read_csv(url_link)
        return data
    except Exception as e:
        logging.error(f"Unexpected error fetching data from {url_link}: {e}")
        return None

# We have to get the raw url to the dataset
donation_facilities = get_data_url(donation_facilities_url)
donation_state = get_data_url(donation_state_url)
newdonors_facility = get_data_url(newdonors_facility_url)
newdonors_state = get_data_url(newdonors_state_url)

blood_donation_retention = pd.read_parquet(r"C:\Users\Irfan Syafi\Desktop\Portfolio Github\blood_donation_folder\malaysia_blood_donation\blood_donation_retention_2024.parquet")



Store the data into a csv and append the data daily. This will act as staging phase before we transform the data and load into a database.

In [51]:
donation_facilities.to_csv("malaysia_blood_donation/donation_facilities.csv",mode='a',index =False, header=not os.path.exists("malaysia_blood_donation/donation_facilities.csv"))
donation_state.to_csv("malaysia_blood_donation/donation_state.csv",mode='a',index =False, header=not os.path.exists("malaysia_blood_donation/donation_state.csv"))
newdonors_facility.to_csv("malaysia_blood_donation/newdonors_facility.csv",mode='a',index =False, header=not os.path.exists("malaysia_blood_donation/newdonors_facility.csv"))
newdonors_state.to_csv("malaysia_blood_donation/newdonors_state.csv",mode='a',index =False, header=not os.path.exists("malaysia_blood_donation/newdonors_state.csv"))
blood_donation_retention.to_csv("malaysia_blood_donation/newdonors_state.csv",mode='a',index =False, header=not os.path.exists("malaysia_blood_donation/blood_donation_retention.csv"))


Extract the data from the csv file that will contain daily updates to transform it for pushing in a database.

In [53]:
donation_facilities_df = pd.read_csv("malaysia_blood_donation/donation_facilities.csv")
donation_facilities_df.head()
print(donation_facilities_df.dtypes)

date                       object
hospital                   object
daily                       int64
blood_a                     int64
blood_b                     int64
blood_o                     int64
blood_ab                    int64
location_centre             int64
location_mobile             int64
type_wholeblood             int64
type_apheresis_platelet     int64
type_apheresis_plasma       int64
type_other                  int64
social_civilian             int64
social_student              int64
social_policearmy           int64
donations_new               int64
donations_regular           int64
donations_irregular         int64
dtype: object


In [48]:
import duckdb

db_file_path = "malaysia_blood_donation/malaysia_blood_donation.db"

db_connection = duckdb.connect(database=db_file_path, read_only=False)


def store_db(data,table_name,db_connection):
    try:
        db_connection.sql(f"CREATE TABLE {table_name} AS {data.to_sql(None, '', index=False)}")
        logging.info(f"Data saved to {table_name} table in the database.")
    except Exception as e:
        print(f"Unexpected error when storing to a database : {e}")

store_db(donation_facilities,"Donation_Facilities",db_connection=db_connection)
store_db(donation_state,"Donation_State",db_connection=db_connection)
store_db(newdonors_facility,"NewDonors_Facility",db_connection=db_connection)
store_db(newdonors_state,"NewDonors_State",db_connection=db_connection)
store_db(blood_donation_retention,"BloodDonor_Retention",db_connection=db_connection)

db_connection.close()

Unexpected error when storing to a database : Could not parse rfc1738 URL from string ''
Unexpected error when storing to a database : Could not parse rfc1738 URL from string ''
Unexpected error when storing to a database : Could not parse rfc1738 URL from string ''
Unexpected error when storing to a database : Could not parse rfc1738 URL from string ''
Unexpected error when storing to a database : Could not parse rfc1738 URL from string ''


Extract all data from the staging to transform the data.

In [38]:

def extract_data(table_name, db_connection):
    try:
        query = f"SELECT * FROM {table_name}"
        extracted_data = pd.read_sql(query, db_connection)
        logging.info(f"Data extracted from {table_name} table in the database.")
        return extracted_data
    except Exception as e:
        print(f"Unexpected error when extracting data: {e}")
        return None

# Example usage:
extracted_data = extract_data("BloodDonor_Retention", db_connection=db_connection)


Unexpected error when extracting data: Execution failed on sql: SELECT * FROM BloodDonor_Retention
Catalog Error: Table with name BloodDonor_Retention does not exist!
Did you mean "pg_settings"?
LINE 1: SELECT * FROM BloodDonor_Retention
                      ^
unable to rollback


  extracted_data = pd.read_sql(query, db_connection)


In [39]:
donation_facilities.head()

Unnamed: 0,date,hospital,daily,blood_a,blood_b,blood_o,blood_ab,location_centre,location_mobile,type_wholeblood,type_apheresis_platelet,type_apheresis_plasma,type_other,social_civilian,social_student,social_policearmy,donations_new,donations_regular,donations_irregular
0,2006-01-01,Hospital Sultanah Nora Ismail,87,19,20,45,3,87,0,87,0,0,0,86,1,0,36,49,2
1,2006-01-01,Hospital Sultanah Aminah,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2006-01-01,Hospital Sultanah Bahiyah,208,67,62,61,18,208,0,208,0,0,0,197,8,3,1,207,0
3,2006-01-01,Hospital Raja Perempuan Zainab II,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2006-01-01,Hospital Melaka,1,0,0,1,0,1,0,1,0,0,0,1,0,0,0,1,0


In [40]:
donation_state.head()

Unnamed: 0,date,state,daily,blood_a,blood_b,blood_o,blood_ab,location_centre,location_mobile,type_wholeblood,type_apheresis_platelet,type_apheresis_plasma,type_other,social_civilian,social_student,social_policearmy,donations_new,donations_regular,donations_irregular
0,2006-01-01,Malaysia,525,152,139,194,40,308,217,525,0,0,0,496,18,11,243,277,5
1,2006-01-02,Malaysia,227,53,43,112,19,162,65,217,6,4,0,224,2,1,83,143,1
2,2006-01-03,Malaysia,112,29,21,56,6,112,0,89,10,13,0,105,2,5,8,101,3
3,2006-01-04,Malaysia,391,92,98,165,36,145,246,371,4,16,0,316,71,4,286,102,3
4,2006-01-05,Malaysia,582,149,198,193,42,371,211,548,17,17,0,555,19,8,328,250,4


In [41]:
newdonors_facility.head()

Unnamed: 0,date,hospital,17-24,25-29,30-34,35-39,40-44,45-49,50-54,55-59,60-64,other,total
0,2006-01-01,Hospital Sultanah Nora Ismail,21,2,2,3,6,1,0,0,1,0,36
1,2006-01-01,Hospital Sultanah Aminah,0,0,0,0,0,0,0,0,0,0,0
2,2006-01-01,Hospital Sultanah Bahiyah,0,0,0,1,0,0,0,0,0,0,1
3,2006-01-01,Hospital Raja Perempuan Zainab II,0,0,0,0,0,0,0,0,0,0,0
4,2006-01-01,Hospital Melaka,0,0,0,0,0,0,0,0,0,0,0


In [42]:
newdonors_state.head()

Unnamed: 0,date,state,17-24,25-29,30-34,35-39,40-44,45-49,50-54,55-59,60-64,other,total
0,2006-01-01,Malaysia,124,31,31,18,24,7,5,2,1,0,243
1,2006-01-02,Malaysia,16,15,11,8,19,6,6,2,0,0,83
2,2006-01-03,Malaysia,1,2,1,2,1,0,0,1,0,0,8
3,2006-01-04,Malaysia,171,35,27,13,16,16,6,1,0,1,286
4,2006-01-05,Malaysia,219,41,22,13,16,8,8,1,0,0,328
