Script for Processing GPS Data Files

This script processes GPS data files by creating a full datetime range and mapping
the GPS data onto this range. The processed data is then saved as CSV files.

In [1]:
import os
import warnings
from glob import glob

import pandas as pd
from tqdm import tqdm

warnings.filterwarnings("ignore")

In [2]:
def create_directory(directory):
    """
    Create a directory if it does not exist.

    Parameters:
    directory (str): The path of the directory to create.
    """
    if not os.path.exists(directory):
        os.makedirs(directory)


def process_gps_file(filepath, savefolder):
    """
    Process a single GPS data file and save the processed data.

    Parameters:
    filepath (str): The path to the GPS data file.
    savefolder (str): The folder to save the processed CSV files.
    """
    try:
        # Extract the base name and filename
        basename = os.path.basename(filepath)
        filename = basename.split(".")[0].upper()
        # print(f"Processing {filename}", end="\t")

        # Read the GPS data file
        gpsdata = pd.read_csv(filepath, parse_dates=[1])

        # Determine the full datetime range
        first_day = gpsdata.loc[0, "Converted"].to_pydatetime()
        last_day = gpsdata.loc[len(gpsdata) - 1, "Converted"].to_pydatetime()
        fulldatetime = pd.date_range(first_day, last_day)

        # Create a DataFrame with the full datetime range
        fulltime_gpsdata = pd.DataFrame(data={"Datetime": fulldatetime})
        fulltime_gpsdata = fulltime_gpsdata.set_index("Datetime")

        # Set the GPS data index to 'Converted'
        gpsdata = gpsdata.set_index("Converted")

        # Map the GPS data columns to the full datetime range
        for col in gpsdata.columns[1:-3]:
            fulltime_gpsdata[col] = fulltime_gpsdata.index.map(gpsdata[col])

        # Save the processed data to a CSV file
        save_path = os.path.join(savefolder, f"{filename}.csv")
        fulltime_gpsdata.to_csv(save_path)
        # print(f"Saved to {save_path}")

    except Exception as e:
        print(f"Error processing file {filepath}: {e}")


def process_all_files(filefolder, savefolder):
    """
    Process all GPS data files in the specified folder.

    Parameters:
    filefolder (str): The folder containing the GPS data files.
    savefolder (str): The folder to save the processed CSV files.
    """
    # Ensure the save folder exists
    create_directory(savefolder)

    # Get a list of all files to process
    file2process = glob(os.path.join(filefolder, "*.csv"))

    # Process each file
    for filepath in tqdm(file2process):
        process_gps_file(filepath, savefolder)

In [3]:
if __name__ == "__main__":
    # Define the input and output directories
    FILE_FOLDER = r"E:\SUBSIDENCE_PROJECT_DATA\GPS_2024_HsuYaru\1__Raw\B__DatetimeConverted_Data"
    SAVE_FOLDER = (
        r"E:\SUBSIDENCE_PROJECT_DATA\GPS_2024_HsuYaru\2__Processed\A__Fulltime"
    )

    # Process all files
    process_all_files(FILE_FOLDER, SAVE_FOLDER)

100%|████████████████████████████████████████████████████████████████████████████████| 106/106 [00:09<00:00, 11.48it/s]
