In [None]:
# PYTHON FUNCTION TO REFORMAT A .XLSX FILE INTO A CSV COMPATIBLE WITH THE R MIXED MODEL SCRIPT
import os
import pandas as pd

def _unique_output_path(base_path):
    if not os.path.exists(base_path):
        return base_path
    root, ext = os.path.splitext(base_path)
    i = 1
    while True:
        candidate = f"{root} ({i}){ext}"
        if not os.path.exists(candidate):
            return candidate
        i += 1

def split_excel(input_excel_path, sheet_name=0, filename_col=0, value_col=1, overwrite=False, biological_rep=None):
    df = pd.read_excel(input_excel_path, sheet_name=sheet_name)
    
    # Ask for biological replicate number if not provided
    if biological_rep is None:
        while True:
            try:
                biological_rep = int(input("Enter the Biological Replicate number for this dataset (e.g., 1, 2, 3): "))
                break
            except ValueError:
                print("Please enter a valid integer number.")

    out_rows = []
    for idx in range(len(df)):
        name = df.iloc[idx, filename_col]
        if pd.isna(name):
            continue
        name = str(name).strip()
        base = os.path.splitext(os.path.basename(name))[0]
        parts = base.split('_')

        treatment = parts[1] if len(parts) > 1 else ""
        well = parts[2] if len(parts) > 2 else ""
        image = parts[3] if len(parts) > 3 else ""
        result_val = df.iloc[idx, value_col] if value_col < df.shape[1] else None

        out_rows.append({
            "Filename": name,
            "Treatment": treatment,
            "Biological_Rep": biological_rep,
            "Well": well,
            "Image": image,
            "Results": result_val
        })

    out_df = pd.DataFrame(out_rows, columns=["Filename", "Treatment", "Biological_Rep", "Well", "Image", "Results"])

    out_df["Well"] = (
        pd.factorize(out_df["Treatment"].astype(str) + "|" + out_df["Well"].astype(str), sort=False)[0] + 1
    ).astype(int)

    in_dir = os.path.dirname(os.path.abspath(input_excel_path))
    in_stem = os.path.splitext(os.path.basename(input_excel_path))[0]
    out_path = os.path.join(in_dir, f"{in_stem}_RMixedFormated.csv")
    if not overwrite:
        out_path = _unique_output_path(out_path)

    out_df.to_csv(out_path, index=False)
    print(f"Saved: {out_path}")
    print(f"Biological Replicate: {biological_rep}")
    print(f"Total rows: {len(out_df)}")
    return out_df, out_path


split_excel('/Users/allisonpickle/Desktop/PD_Microglia Area.xlsx', biological_rep=1) #specify biological rep to automatically populate all rows

Saved: /Users/allisonpickle/Desktop/PD_Microglia Area_RMixedFormated (1).csv
Biological Replicate: 1
Total rows: 146


(                      Filename Treatment  Biological_Rep  Well Image  \
 0    Tri_10ugLPS_01_1_C_0.tiff   10ugLPS               1     1     1   
 1    Tri_10ugLPS_01_2_C_0.tiff   10ugLPS               1     1     2   
 2    Tri_10ugLPS_02_1_C_0.tiff   10ugLPS               1     2     1   
 3    Tri_10ugLPS_02_2_C_0.tiff   10ugLPS               1     2     2   
 4    Tri_10ugLPS_03_1_C_0.tiff   10ugLPS               1     3     1   
 ..                         ...       ...             ...   ...   ...   
 141       Tri_PD_09_1_C_0.tiff        PD               1    79     1   
 142       Tri_PD_09_2_C_0.tiff        PD               1    79     2   
 143       Tri_PD_10_1_C_0.tiff        PD               1    80     1   
 144       Tri_PD_10_2_C_0.tiff        PD               1    80     2   
 145       Tri_PD_10_3_C_0.tiff        PD               1    80     3   
 
         Results  
 0    563.593950  
 1    350.590900  
 2    213.564175  
 3    319.841250  
 4    520.275100  
 ..     

Add Data visualization
