In [None]:
from deep_translator import GoogleTranslator
from my_packages import *
import pinyin

In [None]:
def differ_to_ref(series, convert_to="milimeter"):
    """
    Calculate differential values relative to the first measurement in the series.

    Parameters:
    - series (pd.Series): A pandas Series containing the measurements.
    - convert_to (str): Unit to convert the differential values to.
                        Accepts 'milimeter' or 'centimeter'.

    Returns:
    - np.ndarray: The differential values converted to the specified unit.

    Raises:
    - ValueError: If 'convert_to' is not 'milimeter' or 'centimeter'.
    """
    if convert_to == "milimeter":
        multiplier = 1000
    elif convert_to == "centimeter":
        multiplier = 100
    else:
        raise ValueError("Invalid 'convert_to' value. Must be 'milimeter' or 'centimeter'.")

    # Calculate differential values relative to the first measurement and apply conversion.
    return np.array((series - series.iloc[0]) * multiplier, dtype=np.float16)


def cdisp_from_base(input_array):
    cdisp_ref_to_base = np.nancumsum(input_array[::-1], dtype=np.float64)
    return cdisp_ref_to_base[::-1]


def columns_to_numbers(input_array):
    string_to_num = [eval(ele.split("_")[-1].split(" ")[0]) for ele in input_array]
    round_num = np.round(string_to_num, 3)
    return round_num


def process_first_row(df):
    subdf = df.copy()
    df_col2num = columns_to_numbers(subdf.columns)
    first_row = subdf.iloc[0, :].values
    first_row_NaN = np.isnan(first_row)
    first_row[first_row_NaN] = df_col2num[first_row_NaN]
    subdf.iloc[0, :] = first_row
    return subdf


def datetime2string(input_array):
    return ["F" + ele.strftime("%Y%m%d") for ele in input_array]


def get_savepath(savefolder, fpath, suffix):
    base = os.path.basename(fpath)
    cname = base.split(".")[0]
    ename = pinyin.get(cname, format="strip")
    savename = f"{ename.upper()}_{cname}_{suffix}.xlsx"
    savepath = os.path.join(savefolder, savename)
    return savepath

# Function for error handling and detailed logging
def log_error(message, select_fpath, error):
    logging.error(f"{message} | File: {select_fpath} | Error: {error}")
    print(f"Error processing {select_fpath}: {error}")  # Print message for immediate feedback

In [None]:
# Main directory containing well measurement data
data_dir = r"E:\SUBSIDENCE_PROJECT_DATA\地陷資料整理\地陷井\監測井_資料清理結果"

# Retrieve all well data file paths
data_fpath = glob(os.path.join(data_dir, "*.xz"))

# Load well metadata
well_metadata_file = r"E:\SUBSIDENCE_PROJECT_DATA\地陷資料整理\地陷井\select_well.xlsx"
well_metadata = pd.read_excel(well_metadata_file)

# Directory for storing processed data
processed_data_dir = "Deformation_Differencing"
os.makedirs(processed_data_dir, exist_ok=True)

select_fpath = data_fpath[0]

well_df = pd.read_pickle(select_fpath)
well_df.index = pd.to_datetime(well_df.index)

# Calculate differences between subsequent columns to find deformation
column_diffs = well_df.diff(axis=1)
column_diffs.iloc[:, 0] = well_df.iloc[:, 0]

filled_diffs = column_diffs.fillna(axis=0, method="ffill")

# Differential values with reference to first measurement
diffs_ref2first_transposed = filled_diffs.apply(differ_to_ref, axis=0).transpose()

# Convert dates in columns to string format
formatted_columns = datetime2string(diffs_ref2first_transposed.columns)
diffs_ref2first_transposed.columns = formatted_columns

# Prepare DataFrame with Depth as a column
diffs_ref2first_df = diffs_ref2first_transposed.reset_index().rename({"index": "Depth"}, axis=1)
filled_diffs_ref2first_df = diffs_ref2first_df.fillna(0)

# Determine output file path for saving
save_fpath = get_savepath(processed_data_dir, select_fpath, suffix="diff")
save_fpath

# Save the processed DataFrame to Excel
# filled_diffs_ref2first_df.to_excel(save_file_path, index=False)