In [None]:
import pandas as pd
import logging

In [128]:
def creating_logger():
    os.makedirs("logs", exist_ok=True)
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s | %(levelname)s | %(message)s",
        handlers=[
            logging.StreamHandler(),  # show in console
            logging.FileHandler("logs/ingest.log", mode="a", encoding="utf-8")  # save to file
        ],
    )
    logger = logging.getLogger("ingest")


In [129]:
doctors_path = "/Users/youngeddieb/PycharmProjects/BI-Analytics/BI-Analytics/Pipeline/datasets/Data Enginner's Doctors Excel - VIP Medical Group.xlsx"
appointments_path = "/Users/youngeddieb/PycharmProjects/BI-Analytics/BI-Analytics/Pipeline/datasets/Data Engineer's Appointments Excel - VIP Medical Group.xlsx"

In [130]:
def extract_data(doctors_path: str, appointments_path: str):
    """
    Read both Excel files and return them as DataFrames.
    No transformations, no database upload yet.
    """
    logger.info("=== START: INGEST ===")

    # Read doctors.xlsx
    logger.info(f"Reading doctors file from: {doctors_path}")
    doctors_df = pd.read_excel(doctors_path, engine="openpyxl")
    logger.info(f"Doctors file loaded: {len(doctors_df)} rows")

    # Read appointments.xlsx
    logger.info(f"Reading appointments file from: {appointments_path}")
    appointments_df = pd.read_excel(appointments_path, engine="openpyxl")
    logger.info(f"Appointments file loaded: {len(appointments_df)} rows")

    logger.info("=== END: INGEST ===")

    return doctors_df, appointments_df

In [131]:
if __name__ == "__main__":
        doctors_df, appointments_df = extract_data(doctors_path, appointments_path)
        print("\n=== Doctors sample ===")
        print(doctors_df.head())
        print("\n=== Appoiments sample ===")
        print(appointments_df.head())

2025-11-01 21:00:06,602 | INFO | === START: INGEST ===
2025-11-01 21:00:06,603 | INFO | Reading doctors file from: /Users/youngeddieb/PycharmProjects/BI-Analytics/BI-Analytics/Pipeline/datasets/Data Enginner's Doctors Excel - VIP Medical Group.xlsx
2025-11-01 21:00:06,616 | INFO | Doctors file loaded: 5 rows
2025-11-01 21:00:06,616 | INFO | Reading appointments file from: /Users/youngeddieb/PycharmProjects/BI-Analytics/BI-Analytics/Pipeline/datasets/Data Engineer's Appointments Excel - VIP Medical Group.xlsx
2025-11-01 21:00:06,643 | INFO | Appointments file loaded: 1025 rows
2025-11-01 21:00:06,644 | INFO | === END: INGEST ===



=== Doctors sample ===
   doctor_id           name specialty
0        100      Dr. Pérez      Vein
1        101      Dr. Gómez      Pain
2        102    Dr. Sánchez      Vein
3        103  Dr. Rodríguez      Pain
4        104   Dr. Martínez      Vein

=== Appoiments sample ===
  booking_id  patient_id  doctor_id booking_date     status
0          1        10.0      100.0   10/20/2025  confirmed
1          2        11.0      101.0   10/21/2025  cancelled
2          3        12.0      100.0   10/20/2025  confirmed
3          4        13.0      101.0   10/20/2025  confirmed
4          5        14.0      102.0   10/20/2025  cancelled
