## version 1

In [1]:
"""
synthesize_vf_effect_per_neuron.py
Python ≥3.9   pandas ≥2.0   numpy ≥1.25
"""

from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd

# ── USER SETTINGS ──────────────────────────────────────────────────────
tables_dir = Path(
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables"
)

# baseline firing‑rate multipliers per neuron
#   any neuron not listed uses the "default" entry
baseline_mult = {
    "24": 10.0,        # neuron 24 fires 10× nominal
    "27": 3.0,        # neuron 27 fires 3× nominal
    "default": 1.0,
}

# voltage → spikes scaling: nested dict  {group → {neuron → factor}}
coeff = {
    "pre-stim":  {"24": 2e-5, "default": 2e-5},
    "post-stim": {"24": 4e-5, "default": 6e-5},
}

noise_sd = 0.2                          # Gaussian noise σ
valid_groups = {"pre-stim", "post-stim"}
summary_rows = {"correlation", "is_correlated"}
stamp = datetime.now().strftime("%y%m%d_%H%M%S")
# ───────────────────────────────────────────────────────────────────────


def find_trials(root: Path):
    for fr_fp in root.glob("*_cluster_firing_rates_windowed.xlsx"):
        key = fr_fp.stem.replace("_cluster_firing_rates_windowed", "")
        vf_fp = root / f"{key}_average_vf_voltage_windowed.xlsx"
        if vf_fp.exists():
            yield key, fr_fp, vf_fp


def split_sheet(df: pd.DataFrame):
    meas = df[df["group"].isin(valid_groups)].copy()
    summ = df[~df["group"].isin(valid_groups)].copy()
    return meas, summ


def apply_effect(fr_meas: pd.DataFrame, vf_meas: pd.DataFrame) -> pd.DataFrame:
    new = fr_meas.copy()

    # columns that hold neuron firing rates
    neuron_cols = fr_meas.columns.difference(["group"])

    # ── PATCH: harmonise key types ────────────────────────────────────
    # convert every column label to a *string* so look‑ups in the dicts match
    col_str = [str(c) for c in neuron_cols]
    # -----------------------------------------------------------------

    # baseline multipliers in the same order as neuron_cols
    base_vec = np.array(
        [baseline_mult.get(name, baseline_mult["default"]) for name in col_str]
    )

    for g in valid_groups:
        rows = new["group"] == g

        # voltage‑sensitivity coefficients for this group and neuron order
        k_vec = np.array(
            [coeff[g].get(name, coeff[g]["default"]) for name in col_str]
        )

        volt = vf_meas.loc[rows, "avg_voltage"].to_numpy().reshape(-1, 1)
        baseline = fr_meas.loc[rows, neuron_cols] * base_vec
        noise = noise_sd * np.random.randn(*baseline.shape)

        new.loc[rows, neuron_cols] = baseline + volt * k_vec + noise

    return new[fr_meas.columns]          # preserve original column order



def synthesize_all():
    for key, fr_path, vf_path in find_trials(tables_dir):

        fr_raw = pd.read_excel(fr_path, index_col=0)  # keep index column
        vf_raw = pd.read_excel(vf_path)               # no index column here

        fr_meas, fr_summ = split_sheet(fr_raw)
        vf_meas, _       = split_sheet(vf_raw)

        vf_meas.index = fr_meas.index                 # align labels

        synthetic_meas = apply_effect(fr_meas, vf_meas)
        out_df = pd.concat([synthetic_meas, fr_summ])

        outfile = fr_path.with_stem(
            fr_path.stem.replace(
                "_cluster_firing_rates_windowed",
                f"_cluster_firing_rates_synthetic_{stamp}",
            )
        )
        out_df.to_excel(outfile, index=True)
        print(f"✓  {outfile}")


if __name__ == "__main__":
    synthesize_all()


✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_1_240918_143256_cluster_firing_rates_synthetic_250422_104020.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_2_240918_143936_cluster_firing_rates_synthetic_250422_104020.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_3_240918_144658_cluster_firing_rates_synthetic_250422_104020.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_4_240918_145638_cluster_firing_rates_synthetic_250422_104020.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_5_240918_150137_cluster_firing_rates_synthetic_250422_104020.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_6_240918_150811_cluster_firing_rates_synthetic_250422_104020.xlsx
✓  D:\SynologyDrive\CM

## version 2 - point to a real folder, save to a new folder, keep same names

1) give a folder where files are stored
2)  the new spreadsheets save into a new folder that I specify
3) the filenames of the new spreadsheets to be the same as the old ones

In [2]:
"""
synthesize_vf_effect_per_neuron.py
Python ≥3.9   pandas ≥2.0   numpy ≥1.25
"""

from pathlib import Path
import numpy as np
import pandas as pd

# ── USER SETTINGS ──────────────────────────────────────────────────────
raw_dir = Path(                       # originals live here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.013 Multi_Rat_Linear_Mixed_Effects_flex_probe\DW322\tables"
)
synthetic_dir = Path(                 # new files go here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables"
)

baseline_mult = {"24": 10.0, "27": 3.0, "default": 1.0}
coeff = {
    "pre-stim":  {"24": 2e-5, "default": 2e-5},
    "post-stim": {"24": 4e-5, "default": 6e-5},
}
noise_sd = 0.2
valid_groups = {"pre-stim", "post-stim"}
summary_rows = {"correlation", "is_correlated"}
# ───────────────────────────────────────────────────────────────────────


# create output folder if it does not exist
synthetic_dir.mkdir(parents=True, exist_ok=True)


def find_trials(root: Path):
    for fr_fp in root.glob("*_cluster_firing_rates_windowed.xlsx"):
        key = fr_fp.stem.replace("_cluster_firing_rates_windowed", "")
        vf_fp = root / f"{key}_average_vf_voltage_windowed.xlsx"
        if vf_fp.exists():
            yield key, fr_fp, vf_fp


def split_sheet(df: pd.DataFrame):
    meas = df[df["group"].isin(valid_groups)].copy()
    summ = df[~df["group"].isin(valid_groups)].copy()
    return meas, summ


def apply_effect(fr_meas: pd.DataFrame, vf_meas: pd.DataFrame) -> pd.DataFrame:
    new = fr_meas.copy()
    neuron_cols = fr_meas.columns.difference(["group"])
    col_str = [str(c) for c in neuron_cols]

    base_vec = np.array(
        [baseline_mult.get(name, baseline_mult["default"]) for name in col_str]
    )

    for g in valid_groups:
        rows = new["group"] == g
        k_vec = np.array(
            [coeff[g].get(name, coeff[g]["default"]) for name in col_str]
        )

        volt = vf_meas.loc[rows, "avg_voltage"].to_numpy().reshape(-1, 1)
        baseline = fr_meas.loc[rows, neuron_cols] * base_vec
        noise = noise_sd * np.random.randn(*baseline.shape)

        new.loc[rows, neuron_cols] = baseline + volt * k_vec + noise

    return new[fr_meas.columns]


def synthesize_all():
    for key, fr_path, vf_path in find_trials(raw_dir):

        fr_raw = pd.read_excel(fr_path, index_col=0)
        vf_raw = pd.read_excel(vf_path)

        fr_meas, fr_summ = split_sheet(fr_raw)
        vf_meas, _       = split_sheet(vf_raw)

        vf_meas.index = fr_meas.index

        synthetic_meas = apply_effect(fr_meas, vf_meas)
        out_df = pd.concat([synthetic_meas, fr_summ])

        outfile = synthetic_dir / fr_path.name      # identical filename
        out_df.to_excel(outfile, index=True)
        print(f"✓  {outfile}")


if __name__ == "__main__":
    synthesize_all()


✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_1_240918_143256_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_2_240918_143936_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_3_240918_144658_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_4_240918_145638_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_5_240918_150137_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_6_240918_150811_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_7_240918_151516

### DW322

In [3]:
"""
synthesize_vf_effect_per_neuron.py
Python ≥3.9   pandas ≥2.0   numpy ≥1.25
"""

from pathlib import Path
import numpy as np
import pandas as pd

# ── USER SETTINGS ──────────────────────────────────────────────────────
raw_dir = Path(                       # originals live here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.013 Multi_Rat_Linear_Mixed_Effects_flex_probe\DW322\tables"
)
synthetic_dir = Path(                 # new files go here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables"
)

baseline_mult = {"24": 10.0, "27": 3.0, "default": 1.0}
coeff = {
    "pre-stim":  {"24": 2e-5, "default": 2e-5},
    "post-stim": {"24": 4e-5, "default": 6e-5},
}
noise_sd = 0.2
valid_groups = {"pre-stim", "post-stim"}
summary_rows = {"correlation", "is_correlated"}
# ───────────────────────────────────────────────────────────────────────


# create output folder if it does not exist
synthetic_dir.mkdir(parents=True, exist_ok=True)


def find_trials(root: Path):
    for fr_fp in root.glob("*_cluster_firing_rates_windowed.xlsx"):
        key = fr_fp.stem.replace("_cluster_firing_rates_windowed", "")
        vf_fp = root / f"{key}_average_vf_voltage_windowed.xlsx"
        if vf_fp.exists():
            yield key, fr_fp, vf_fp


def split_sheet(df: pd.DataFrame):
    meas = df[df["group"].isin(valid_groups)].copy()
    summ = df[~df["group"].isin(valid_groups)].copy()
    return meas, summ


def apply_effect(fr_meas: pd.DataFrame, vf_meas: pd.DataFrame) -> pd.DataFrame:
    new = fr_meas.copy()
    neuron_cols = fr_meas.columns.difference(["group"])
    col_str = [str(c) for c in neuron_cols]

    base_vec = np.array(
        [baseline_mult.get(name, baseline_mult["default"]) for name in col_str]
    )

    for g in valid_groups:
        rows = new["group"] == g
        k_vec = np.array(
            [coeff[g].get(name, coeff[g]["default"]) for name in col_str]
        )

        volt = vf_meas.loc[rows, "avg_voltage"].to_numpy().reshape(-1, 1)
        baseline = fr_meas.loc[rows, neuron_cols] * base_vec
        noise = noise_sd * np.random.randn(*baseline.shape)

        new.loc[rows, neuron_cols] = baseline + volt * k_vec + noise

    return new[fr_meas.columns]


def synthesize_all():
    for key, fr_path, vf_path in find_trials(raw_dir):

        fr_raw = pd.read_excel(fr_path, index_col=0)
        vf_raw = pd.read_excel(vf_path)

        fr_meas, fr_summ = split_sheet(fr_raw)
        vf_meas, _       = split_sheet(vf_raw)

        vf_meas.index = fr_meas.index

        synthetic_meas = apply_effect(fr_meas, vf_meas)
        out_df = pd.concat([synthetic_meas, fr_summ])

        outfile = synthetic_dir / fr_path.name      # identical filename
        out_df.to_excel(outfile, index=True)
        print(f"✓  {outfile}")


if __name__ == "__main__":
    synthesize_all()


✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_1_240918_143256_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_2_240918_143936_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_3_240918_144658_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_4_240918_145638_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_5_240918_150137_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_6_240918_150811_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW322\tables\VF_7_240918_151516

### DW323

In [4]:
"""
synthesize_vf_effect_per_neuron.py
Python ≥3.9   pandas ≥2.0   numpy ≥1.25
"""

from pathlib import Path
import numpy as np
import pandas as pd

# ── USER SETTINGS ──────────────────────────────────────────────────────
raw_dir = Path(                       # originals live here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.013 Multi_Rat_Linear_Mixed_Effects_flex_probe\DW323\tables"
)
synthetic_dir = Path(                 # new files go here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW323\tables"
)

baseline_mult = {"24": 10.0, "27": 3.0, "default": 1.0}
coeff = {
    "pre-stim":  {"24": 2e-5, "default": 2e-5},
    "post-stim": {"24": 4e-5, "default": 6e-5},
}
noise_sd = 0.2
valid_groups = {"pre-stim", "post-stim"}
summary_rows = {"correlation", "is_correlated"}
# ───────────────────────────────────────────────────────────────────────


# create output folder if it does not exist
synthetic_dir.mkdir(parents=True, exist_ok=True)


def find_trials(root: Path):
    for fr_fp in root.glob("*_cluster_firing_rates_windowed.xlsx"):
        key = fr_fp.stem.replace("_cluster_firing_rates_windowed", "")
        vf_fp = root / f"{key}_average_vf_voltage_windowed.xlsx"
        if vf_fp.exists():
            yield key, fr_fp, vf_fp


def split_sheet(df: pd.DataFrame):
    meas = df[df["group"].isin(valid_groups)].copy()
    summ = df[~df["group"].isin(valid_groups)].copy()
    return meas, summ


def apply_effect(fr_meas: pd.DataFrame, vf_meas: pd.DataFrame) -> pd.DataFrame:
    new = fr_meas.copy()
    neuron_cols = fr_meas.columns.difference(["group"])
    col_str = [str(c) for c in neuron_cols]

    base_vec = np.array(
        [baseline_mult.get(name, baseline_mult["default"]) for name in col_str]
    )

    for g in valid_groups:
        rows = new["group"] == g
        k_vec = np.array(
            [coeff[g].get(name, coeff[g]["default"]) for name in col_str]
        )

        volt = vf_meas.loc[rows, "avg_voltage"].to_numpy().reshape(-1, 1)
        baseline = fr_meas.loc[rows, neuron_cols] * base_vec
        noise = noise_sd * np.random.randn(*baseline.shape)

        new.loc[rows, neuron_cols] = baseline + volt * k_vec + noise

    return new[fr_meas.columns]


def synthesize_all():
    for key, fr_path, vf_path in find_trials(raw_dir):

        fr_raw = pd.read_excel(fr_path, index_col=0)
        vf_raw = pd.read_excel(vf_path)

        fr_meas, fr_summ = split_sheet(fr_raw)
        vf_meas, _       = split_sheet(vf_raw)

        vf_meas.index = fr_meas.index

        synthetic_meas = apply_effect(fr_meas, vf_meas)
        out_df = pd.concat([synthetic_meas, fr_summ])

        outfile = synthetic_dir / fr_path.name      # identical filename
        out_df.to_excel(outfile, index=True)
        print(f"✓  {outfile}")


if __name__ == "__main__":
    synthesize_all()


✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW323\tables\VF_1_240911_164342_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW323\tables\VF_2_240911_165039_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW323\tables\VF_3_240911_165617_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW323\tables\VF_4_240911_170446_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW323\tables\VF_5_240911_171014_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW323\tables\VF_6_240911_171505_cluster_firing_rates_windowed.xlsx


### DW327

In [5]:
"""
synthesize_vf_effect_per_neuron.py
Python ≥3.9   pandas ≥2.0   numpy ≥1.25
"""

from pathlib import Path
import numpy as np
import pandas as pd

# ── USER SETTINGS ──────────────────────────────────────────────────────
raw_dir = Path(                       # originals live here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.013 Multi_Rat_Linear_Mixed_Effects_flex_probe\DW327\tables"
)
synthetic_dir = Path(                 # new files go here
    r"D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables"
)

baseline_mult = {"24": 10.0, "27": 3.0, "default": 1.0}
coeff = {
    "pre-stim":  {"24": 2e-5, "default": 2e-5},
    "post-stim": {"24": 4e-5, "default": 6e-5},
}
noise_sd = 0.2
valid_groups = {"pre-stim", "post-stim"}
summary_rows = {"correlation", "is_correlated"}
# ───────────────────────────────────────────────────────────────────────


# create output folder if it does not exist
synthetic_dir.mkdir(parents=True, exist_ok=True)


def find_trials(root: Path):
    for fr_fp in root.glob("*_cluster_firing_rates_windowed.xlsx"):
        key = fr_fp.stem.replace("_cluster_firing_rates_windowed", "")
        vf_fp = root / f"{key}_average_vf_voltage_windowed.xlsx"
        if vf_fp.exists():
            yield key, fr_fp, vf_fp


def split_sheet(df: pd.DataFrame):
    meas = df[df["group"].isin(valid_groups)].copy()
    summ = df[~df["group"].isin(valid_groups)].copy()
    return meas, summ


def apply_effect(fr_meas: pd.DataFrame, vf_meas: pd.DataFrame) -> pd.DataFrame:
    new = fr_meas.copy()
    neuron_cols = fr_meas.columns.difference(["group"])
    col_str = [str(c) for c in neuron_cols]

    base_vec = np.array(
        [baseline_mult.get(name, baseline_mult["default"]) for name in col_str]
    )

    for g in valid_groups:
        rows = new["group"] == g
        k_vec = np.array(
            [coeff[g].get(name, coeff[g]["default"]) for name in col_str]
        )

        volt = vf_meas.loc[rows, "avg_voltage"].to_numpy().reshape(-1, 1)
        baseline = fr_meas.loc[rows, neuron_cols] * base_vec
        noise = noise_sd * np.random.randn(*baseline.shape)

        new.loc[rows, neuron_cols] = baseline + volt * k_vec + noise

    return new[fr_meas.columns]


def synthesize_all():
    for key, fr_path, vf_path in find_trials(raw_dir):

        fr_raw = pd.read_excel(fr_path, index_col=0)
        vf_raw = pd.read_excel(vf_path)

        fr_meas, fr_summ = split_sheet(fr_raw)
        vf_meas, _       = split_sheet(vf_raw)

        vf_meas.index = fr_meas.index

        synthetic_meas = apply_effect(fr_meas, vf_meas)
        out_df = pd.concat([synthetic_meas, fr_summ])

        outfile = synthetic_dir / fr_path.name      # identical filename
        out_df.to_excel(outfile, index=True)
        print(f"✓  {outfile}")


if __name__ == "__main__":
    synthesize_all()


✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables\VF_01_241125_153746_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables\VF_02_241125_154307_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables\VF_03_241125_154841_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables\VF_04_241125_155417_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables\VF_05_241125_155941_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables\VF_06_241125_160515_cluster_firing_rates_windowed.xlsx
✓  D:\SynologyDrive\CMU.80 Data\88 Analyzed Data\88.014 Multi_Rat_synthetic_data\DW327\tables\VF_07_241125