In [8]:
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
import seaborn as sns
from scipy.signal import savgol_filter


In [9]:
# GFP 
spec_df = pd.read_csv('./specificity.csv',index_col=0)

In [11]:
# Min–max scale each strain response to [0, 1] to prevent bias across strains

minmaxscaled_df = pd.DataFrame()

strains = [
    "merR_MG", "merR_ALE1", "merR_ALE2", "merR_ALE3", "merR_ALE4", "merR_ALE5",
    "arsR_MG", "arsR_ALE1", "arsR_ALE2", "arsR_ALE3", "arsR_ALE4", "arsR_ALE5",
    "cusC_MG", "cusC_ALE1", "cusC_ALE2", "cusC_ALE3", "cusC_ALE4", "cusC_ALE5",
    "zntA_MG", "zntA_ALE1", "zntA_ALE2", "zntA_ALE3", "zntA_ALE4", "zntA_ALE5",
    "cadC_MG", "cadC_ALE1", "cadC_ALE2", "cadC_ALE3", "cadC_ALE4", "cadC_ALE5",
]

# --- Min–max scaling per strain prefix ---
for i in strains:
    # Grab all columns whose names contain this strain prefix (e.g., "arsR_ALE2")
    filtered_columns = spec_df.filter(like=i)

    # Compute global min and max across all matching columns (and all rows)
    f_min = filtered_columns.min().min()
    f_max = filtered_columns.max().max()

    # Min–max normalize (keeps identical behavior; assumes f_max != f_min)
    normalized_df = (filtered_columns - f_min) / (f_max - f_min)

    # Append scaled columns into the output dataframe
    minmaxscaled_df = pd.concat([minmaxscaled_df, normalized_df], axis=1)

# Preserve the same index as the original data
minmaxscaled_df.index = spec_df.index


# Metals / inducer identifiers used to compute fold-change summary
hm = ["Hg", "As", "Cd", "Pb", "Cu"]

# Long-form dataframe of fold-change per (metal, strain)
concat_df = pd.DataFrame()

for i in strains:
    # Temporary per-strain summary table
    fold_df = pd.DataFrame(columns=["concentration", "strain", "foldchange"])

    for j in hm:
        # Column key for this strain + metal (e.g., "arsR_ALE2_As")
        curr = i + "_" + j

        # Baseline: minimum of smoothed trace over rows 0:9
        base = savgol_filter(minmaxscaled_df.iloc[0:9][str(curr)], 5, 3).min()

        # Response: maximum of smoothed trace over rows 9:20
        top = savgol_filter(minmaxscaled_df.iloc[9:20][str(curr)], 5, 3).max()

        # Fold-change metric stored as (top - baseline)
        fold_df.loc[len(fold_df.index)] = [j, i, top - base]

    # Append this strain’s block into the combined long-form table
    concat_df = pd.concat([concat_df, fold_df], ignore_index=True)

# UNCOMMENT TO SAVE AGAIN
# concat_df.to_csv("spec.csv")