In [1]:
%pip install watchdog

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import time
import os

class CSVHandler(FileSystemEventHandler):
    def on_modified(self, event):
        # Check if the modified file is one of the raw CSV files
        if event.src_path.endswith('.csv'):
            print(f'{event.src_path} has been modified. Preparing data...')
            preparingData()

def preparingData():
    users_df = pd.read_csv('../raw/RAW_Users.csv')
    courses_df = pd.read_csv('../raw/RAW_Courses.csv')
    feedbacks_df = pd.read_csv('../raw/RAW_Feedbacks.csv')
    training_performance_df = pd.read_csv('../raw/RAW_TrainingPerformances.csv')

    # Cleaning data (example)
    users_df.drop(columns=['createdAt', 'updatedAt'], inplace=True)
    feedbacks_df.drop(columns=['createdAt', 'updatedAt'], inplace=True)
    courses_df.drop(columns=['createdAt', 'updatedAt'], inplace=True)
    
    courses_df['startCourseDate'] = pd.to_datetime(courses_df['startCourseDate'])
    courses_df['courseExpireDate'] = pd.to_datetime(courses_df['courseExpireDate'])

    # Filling NaNs in reportingManager with 'None'
    users_df['reportingManager'].fillna('None', inplace=True)

    # Save prepped data
    users_df.to_csv('PREP_Users.csv', index=False)
    courses_df.to_csv('PREP_Courses.csv', index=False)
    feedbacks_df.to_csv('PREP_Feedbacks.csv', index=False)
    training_performance_df.to_csv('PREP_TrainingPerformances.csv', index=False)

if __name__ == "__main__":
    preparingData()
    path = '../RAW'  # Path to the folder you want to monitor
    event_handler = CSVHandler()
    observer = Observer()
    observer.schedule(event_handler, path, recursive=False)

    print("Monitoring folder for changes...")
    observer.start()
    
    try:
        while True:
            time.sleep(1)  # Keep the script running
    except KeyboardInterrupt:
        observer.stop()
    observer.join()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  users_df['reportingManager'].fillna('None', inplace=True)


Monitoring folder for changes...
../RAW\RAW_Users.csv has been modified. Preparing data...
../RAW\RAW_Users.csv has been modified. Preparing data...
../RAW\RAW_Courses.csv has been modified. Preparing data...
../RAW\RAW_Feedbacks.csv has been modified. Preparing data...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  users_df['reportingManager'].fillna('None', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  users_df['reportingManager'].fillna('None', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which

../RAW\RAW_TrainingPerformances.csv has been modified. Preparing data...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  users_df['reportingManager'].fillna('None', inplace=True)
