In [10]:
import os
import pandas as pd
import numpy as np

def NaN_the_zeros(log_file, quant_file):
    
    # Read files
    df_old = pd.read_csv(log_file, header=None)
    df_new = pd.read_csv(quant_file, header=None)

    # Convert to numbers
    df_old_numeric = df_old.iloc[2:, 1:].apply(pd.to_numeric, errors='coerce')
    df_new_numeric = df_new.iloc[2:, 1:].apply(pd.to_numeric, errors='coerce')

    # Check file dimensions match before masking
    if (df_old_numeric.shape != df_new_numeric.shape):
        raise ValueError("The numeric portions of the two files must have the same shape!")

    # Mask new.csv: if old.csv cell = 0, then new.csv = NaN
    df_new_numeric[df_old_numeric == 0] = np.nan

    # Put the masked cell back into df_new
    df_new.iloc[2:, 1:] = df_new_numeric

    # Create output directory + output file path
    os.makedirs("5. Normalized", exist_ok=True)
    # Get base name
    base_name = os.path.basename(quant_file)
    name, ext = os.path.splitext(base_name)
    words = name.split('_')
    if len(words) >= 3:
        name_trimmed = '_'.join(words[:3])  # keep first three parts
    else:
        name_trimmed = words
    output_file = os.path.join("5. Normalized", f"{name_trimmed}_normalized.csv")
    
    # Save new masked file
    df_new.to_csv(output_file, index=False, header=False)
    print(f"Saved {output_file}")


# ------------- Run -------------
NaN_the_zeros("./3. Logarithm/pls_DSS_CTRL_filtered_log_x+1.csv",
              "./4. Quantile/pls_DSS_CTRL_filtered_log_x+1_quantile.csv")
NaN_the_zeros("./3. Logarithm/pls_LPS_CTRL_filtered_log_x+1.csv",
              "./4. Quantile/pls_LPS_CTRL_filtered_log_x+1_quantile.csv")
NaN_the_zeros("./3. Logarithm/pls_VECPAC_CTRL_filtered_log_x+1.csv",
              "./4. Quantile/pls_VECPAC_CTRL_filtered_log_x+1_quantile.csv")

Saved 5. Normalized/pls_DSS_CTRL_normalized.csv
Saved 5. Normalized/pls_LPS_CTRL_normalized.csv
Saved 5. Normalized/pls_VECPAC_CTRL_normalized.csv
