The previous version of GPS datatime converter was written in 2020. I used very simple concept to deal with the problem. However, the basic package was not enough to solve some complicated issues. Therefore, I update the algorithm, using some codes got from Stackoverflow

The output of this version will be saved in `Excel` file format

However, I got some issue related to duplication

The reason because some data points are recorded in the same day (as a compensation for the previous missing data)

So I upgrade the script to support my manual work of modifying the duplication

In [1]:
from my_packages import *

In [2]:
def create_directory(directory):
    """
    Create a directory if it does not exist.

    Parameters:
    directory (str): The path of the directory to create.
    """
    if not os.path.exists(directory):
        os.makedirs(directory)


def datetime_converter(frac_year):
    """
    Convert fractional year dates to datetime objects.

    Parameters:
    frac_year (list or pd.Series): List or Series of fractional year dates.

    Returns:
    list: List of converted datetime objects.
    """
    # Extract the integer part (year) from the fractional year
    to_year = pd.to_datetime(frac_year, format="%Y")

    # Determine the number of days in each year, accounting for leap years
    days_in_year = pd.Series([366] * to_year.size).where(to_year.dt.is_leap_year, 365.25)

    # Calculate the fractional part of the year and convert to timedelta
    time_delta = frac_year - frac_year.astype(int)
    output = to_year + pd.to_timedelta(days_in_year * time_delta, unit="d")

    return output.tolist()


def process_files(read_folder, write_folder, extension):
    """
    Process all files with the given extension in the read folder, convert dates,
    and save the output as CSV files in the write folder.

    Parameters:
    read_folder (str): Path to the folder containing the input files.
    write_folder (str): Path to the folder where the output CSV files will be saved.
    extension (str): File extension of the input files to process.
    """
    # Ensure the write folder exists
    create_directory(write_folder)

    # Get a list of files to process
    file_list = [f for f in os.listdir(read_folder) if f.endswith(extension)]
    save_list = [os.path.join(write_folder, f.replace(extension, ".csv")) for f in file_list]

    for file_name, save_path in zip(tqdm(file_list), save_list):
        file_path = os.path.join(read_folder, file_name).replace("\\", "/")
        try:
            # Read the data file
            data = pd.read_table(file_path, header=None, sep=r"\s+")

            # Assign column names
            data.columns = ["Datetime", "N(m)", "E(m)", "U(m)", "sN(m)", "sE(m)", "sU(m)"]

            # Convert fractional year dates to datetime
            data["Converted"] = datetime_converter(data["Datetime"])

            # Create a new DataFrame for the output
            dataframe = pd.DataFrame({
                "Original": data["Datetime"],
                "Converted": [datetime(a.year, a.month, a.day) for a in data["Converted"]],
                "N(m)": data["N(m)"],
                "E(m)": data["E(m)"],
                "U(m)": data["U(m)"],
                "sN(m)": data["sN(m)"],
                "sE(m)": data["sE(m)"],
                "sU(m)": data["sU(m)"]
            })

            # Save the DataFrame to a CSV file
            dataframe.to_csv(save_path, header=True, index=False)
            # print(f"File processed and saved: {save_path}")

        except Exception as e:
            print(f"Error processing file {file_name}: {e}")

In [3]:
if __name__ == "__main__":
    # Define the input and output directories
    READ_FOLDER = r"E:\SUBSIDENCE_PROJECT_DATA\GPS_2024_HsuYaru\1__Raw\Original_Data"
    WRITE_FOLDER = r"E:\SUBSIDENCE_PROJECT_DATA\GPS_2024_HsuYaru\1__Raw\DatetimeConverted_Data"

    # Define the file extension of the files to process
    EXTENSION = ".neu"

    # Process the files
    process_files(READ_FOLDER, WRITE_FOLDER, EXTENSION)

100%|████████████████████████████████████████████████████████████████████████████████| 106/106 [00:11<00:00,  9.22it/s]
