In [8]:
import os
import pandas as pd
from glob import glob

In [10]:
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Create a logger object
logger = logging.getLogger(__name__)

In [15]:
def load_data(file_type : str) -> pd.DataFrame:
    '''
    Will get data from a data folder in the directory.

    :params file_type (str): 'recent' or 'historical'.

    :returns df (pd.DataFrame): Latest file_type dataframe inside the data folder.
    '''
    
    # Define the path pattern for the CSV files
    path_pattern = f"data/*{file_type}*.csv"
    
    # List all matching files
    files = glob(path_pattern)
    
    # Check if files are found
    if not files:
        raise ValueError(f"No files found for type '{file_type}'")
    
    # Get the latest file by creation time
    latest_file = max(files, key=os.path.getctime)
    logger.info(f"Loading the latest {file_type} file: {latest_file}")

    # Load and return the DataFrame
    return pd.read_csv(latest_file)

In [16]:
recent_df = load_data('recent')
historical_df = load_data('historical')

2023-11-06 21:34:43,979 - INFO - Loading the latest recent file: data/lsoa-recent.csv
2023-11-06 21:34:44,375 - INFO - Loading the latest historical file: data/lsoa-historical.csv
