In [None]:
from appgeopy import *
from my_packages import *
import multiprocessing

In [None]:
def process_dataframe_chunk(chunk, date_converter, trend_calculator):
    """
    Process a chunk of a DataFrame to calculate cumulative displacement velocities.

    Parameters:
    - chunk (pd.DataFrame): A chunk of the original DataFrame.
    - date_converter (function): Function to convert column names to datetime.
    - trend_calculator (function): Function to calculate the linear trend and velocity.

    Returns:
    - pd.DataFrame: Processed DataFrame with cumulative displacement velocities.
    """
    # Separate metadata and cumulative displacement data
    metadata_columns = chunk.iloc[:, :3]
    displacement_data = chunk.iloc[:, 3:-1]

    # Convert column names to datetime
    datetime_indices = displacement_data.columns.map(date_converter)
    displacement_data.columns = datetime_indices

    # Generate a full range of dates
    full_date_range = pd.date_range(datetime_indices[0], datetime_indices[-1])

    # Create a DataFrame with missing dates filled with NaN
    full_time_df = pd.DataFrame(
        columns=[date for date in full_date_range if date not in datetime_indices],
        index=np.arange(len(displacement_data))
    ).apply(pd.to_numeric, errors='coerce')

    # Combine the original and the full date range DataFrames
    combined_df = pd.concat([full_time_df, displacement_data], axis=1)
    combined_df = combined_df[full_date_range]

    # Calculate average velocity using the trend calculator function
    velocities = combined_df.apply(trend_calculator, axis=1)
    velocity_df = pd.DataFrame(
        [velocity[1] for velocity in velocities], 
        columns=["velocity_mm_per_day"]
    )
    velocity_df["velocity_mm_per_year"] = velocity_df["velocity_mm_per_day"] * 365.25

    # Combine metadata with calculated velocities
    result_df = pd.concat([metadata_columns, velocity_df], axis=1)
    return result_df

In [None]:
input_fpath = r"E:\030_CHOUSHUI_2024\000_INSCALDEFO_2_INSSTACKPSI\PROCESS_003\14_POST-PROCESSING\ras2pnt_MODIFIED_oChoushui_CUMDISP_LOS_mm.pkl"
df = pd.read_pickle(input_fpath, compression="zip")

subdf = df.iloc[:10_000, :]



In [None]:
max_rows_per_split = 1_000
subdf_split = np.array_split(subdf, np.ceil(len(subdf) / max_rows_per_split))

In [None]:
chunk = subdf_split[1]

# Separate metadata and cumulative displacement data
metadata_columns = chunk.iloc[:, :3]
displacement_data = chunk.iloc[:, 3:-1]

# Convert column names to datetime
datetime_indices = displacement_data.columns.map(datetime_handle.convert_to_datetime)
displacement_data.columns = datetime_indices

# Generate a full range of dates
full_date_range = pd.date_range(datetime_indices[0], datetime_indices[-1])

# Create a DataFrame with missing dates filled with NaN
full_time_df = pd.DataFrame(
    columns=[date for date in full_date_range if date not in datetime_indices],
    index=np.arange(len(displacement_data))
).apply(pd.to_numeric, errors='coerce')

# Combine the original and the full date range DataFrames
combined_df = pd.concat([full_time_df, displacement_data], axis=1)
combined_df = combined_df[full_date_range]

combined_df