In [2]:
import pandas as pd
import re
import numpy as np

# Tap water

In [3]:
data = pd.read_csv('./tap.csv',index_col=0) # Specify directory

# Get base sensor names (exclude BG columns)
sensors = sorted({
    re.split(r'\.', col)[0]
    for col in data.columns
    if not col.endswith('BG')
})

for sensor in sensors:
    bg_col = f"{sensor}BG"
    
    # Skip if BG column doesn't exist
    if bg_col not in data.columns:
        print(f"Warning: {bg_col} not found — skipping {sensor}")
        continue
    
    # Find all replicate columns for this sensor
    sensor_cols = [
        col for col in data.columns
        if re.split(r'\.', col)[0] == sensor
    ]
    
    # Subtract background
    data[sensor_cols] = data[sensor_cols].sub(data[bg_col], axis=0)

# Get rid of BG columns 
data = data.loc[:, ~data.columns.str.endswith('BG')]

# Get rid of bad frames
df_filled = data.copy()
for row in range(1, df_filled.shape[0]-1):  # avoid first and last row to prevent index error
    for col in df_filled.columns:
        if df_filled.at[row, col] == 0:
            df_filled.at[row, col] = (df_filled.at[row-1, col] + df_filled.at[row+1, col]) / 2

# Subtract mean of promoterless
data = df_filled
promoterless_cols = [c for c in data.columns if c.startswith('U139') and 'BG' not in c]
promoterless_mean = data[promoterless_cols].mean(axis=1)
data = data.sub(promoterless_mean, axis=0)
data = data+50

# Signal to noise, noise defined as STD of preinduction signal
SNR = data/data.iloc[0:45, :].std(axis=0)

ind_starts = [61, 196, 338, 500, 644, 776]

# Sensor blocks (column index ranges)
sensor_blocks = {
    'arsR':  slice(0, 4),
    'zntA':  slice(4, 8),
    'zraP':  slice(8, 12),
    'cusC':  slice(12, 16),
    'merR':  slice(16, 20),
}

hm_labels = ['As', 'Cd', 'Hg', 'Pb', 'Cu', '0']

# Initialize foldchange DataFrame
foldchange = pd.DataFrame(
    index=sensor_blocks.keys(),
    columns=hm_labels,
    dtype=float
)


# Compute fold change
for hm, start in enumerate(ind_starts):
    hm_name = hm_labels[hm]

    for sensor, cols in sensor_blocks.items():
        window = SNR.iloc[start:start+24, cols]
        baseline = SNR.iloc[start, cols]

        foldchange.loc[sensor, hm_name] = (
            window.max(axis=0) / baseline
        ).mean()

# UNCOMMENT TO SAVE AGAIN 
#foldchange.to_csv('tap_foldchange.csv')

# Treatment

In [40]:
data = pd.read_csv('./treatment.csv',index_col=0) # Specify directory

# Get base sensor names (exclude BG columns)
sensors = sorted({
    re.split(r'\.', col)[0]
    for col in data.columns
    if not col.endswith('BG')
})

for sensor in sensors:
    bg_col = f"{sensor}BG"
    
    # Skip if BG column doesn't exist
    if bg_col not in data.columns:
        print(f"Warning: {bg_col} not found — skipping {sensor}")
        continue
    
    # Find all replicate columns for this sensor
    sensor_cols = [
        col for col in data.columns
        if re.split(r'\.', col)[0] == sensor
    ]
    
    # Subtract background
    data[sensor_cols] = data[sensor_cols].sub(data[bg_col], axis=0)

# Get rid of BG columns 
data = data.loc[:, ~data.columns.str.endswith('BG')]

# Get rid of bad frames
df_filled = data.copy()
for row in range(1, df_filled.shape[0]-1):  # avoid first and last row to prevent index error
    for col in df_filled.columns:
        if df_filled.at[row, col] == 0:
            df_filled.at[row, col] = (df_filled.at[row-1, col] + df_filled.at[row+1, col]) / 2

# Subtract mean of promoterless
data = df_filled
promoterless_cols = [c for c in data.columns if c.startswith('U139') and 'BG' not in c]
promoterless_mean = data[promoterless_cols].mean(axis=1)
data = data.sub(promoterless_mean, axis=0)
data = data+50

# Signal to noise, noise defined as STD of preinduction signal
SNR = data/data.iloc[0:45, :].std(axis=0)

ind_starts = [61, 196, 338, 500, 644, 776]

# Sensor blocks (column index ranges)
sensor_blocks = {
    'arsR':  slice(0, 4),
    'zntA':  slice(4, 8),
    'zraP':  slice(8, 12),
    'cusC':  slice(12, 16),
    'merR':  slice(16, 20),
}

hm_labels = ['As', 'Cd', 'Hg', 'Pb', 'Cu', '0']

# Initialize foldchange DataFrame
foldchange = pd.DataFrame(
    index=sensor_blocks.keys(),
    columns=hm_labels,
    dtype=float
)


# Compute fold change
for hm, start in enumerate(ind_starts):
    hm_name = hm_labels[hm]

    for sensor, cols in sensor_blocks.items():
        window = SNR.iloc[start:start+24, cols]
        baseline = SNR.iloc[start, cols]

        foldchange.loc[sensor, hm_name] = (
            window.max(axis=0) / baseline
        ).mean()

# UNCOMMENT TO SAVE AGAIN 
#foldchange.to_csv('treatment_foldchange.csv')

# Seawater

In [42]:
data = pd.read_csv('./sea.csv',index_col=0) # Specify directory

# Get base sensor names (exclude BG columns)
sensors = sorted({
    re.split(r'\.', col)[0]
    for col in data.columns
    if not col.endswith('BG')
})

for sensor in sensors:
    bg_col = f"{sensor}BG"
    
    # Skip if BG column doesn't exist
    if bg_col not in data.columns:
        print(f"Warning: {bg_col} not found — skipping {sensor}")
        continue
    
    # Find all replicate columns for this sensor
    sensor_cols = [
        col for col in data.columns
        if re.split(r'\.', col)[0] == sensor
    ]
    
    # Subtract background
    data[sensor_cols] = data[sensor_cols].sub(data[bg_col], axis=0)

# Get rid of BG columns 
data = data.loc[:, ~data.columns.str.endswith('BG')]

# Get rid of bad frames
df_filled = data.copy()
for row in range(1, df_filled.shape[0]-1):  # avoid first and last row to prevent index error
    for col in df_filled.columns:
        if df_filled.at[row, col] == 0:
            df_filled.at[row, col] = (df_filled.at[row-1, col] + df_filled.at[row+1, col]) / 2

# Subtract mean of promoterless
data = df_filled
promoterless_cols = [c for c in data.columns if c.startswith('U139') and 'BG' not in c]
promoterless_mean = data[promoterless_cols].mean(axis=1)
data = data.sub(promoterless_mean, axis=0)
data = data+50

# Signal to noise, noise defined as STD of preinduction signal
SNR = data/data.iloc[0:45, :].std(axis=0)

ind_starts = [61, 196, 338, 500, 644, 776]

# Sensor blocks (column index ranges)
sensor_blocks = {
    'arsR':  slice(0, 4),
    'zntA':  slice(4, 8),
    'zraP':  slice(8, 12),
    'cusC':  slice(12, 16),
    'merR':  slice(16, 20),
}

hm_labels = ['As', 'Cd', 'Hg', 'Pb', 'Cu', '0']

# Initialize foldchange DataFrame
foldchange = pd.DataFrame(
    index=sensor_blocks.keys(),
    columns=hm_labels,
    dtype=float
)


# Compute fold change
for hm, start in enumerate(ind_starts):
    hm_name = hm_labels[hm]

    for sensor, cols in sensor_blocks.items():
        window = SNR.iloc[start:start+24, cols]
        baseline = SNR.iloc[start, cols]

        foldchange.loc[sensor, hm_name] = (
            window.max(axis=0) / baseline
        ).mean()

# UNCOMMENT TO SAVE AGAIN 
#foldchange.to_csv('sea_foldchange.csv')

# Apple juice

In [15]:
data = pd.read_csv('./apple.csv',index_col=0) # Specify directory

# Get base sensor names (exclude BG columns)
sensors = sorted({
    re.split(r'\.', col)[0]
    for col in data.columns
    if not col.endswith('BG')
})

for sensor in sensors:
    bg_col = f"{sensor}BG"
    
    # Skip if BG column doesn't exist
    if bg_col not in data.columns:
        print(f"Warning: {bg_col} not found — skipping {sensor}")
        continue
    
    # Find all replicate columns for this sensor
    sensor_cols = [
        col for col in data.columns
        if re.split(r'\.', col)[0] == sensor
    ]
    
    # Subtract background
    data[sensor_cols] = data[sensor_cols].sub(data[bg_col], axis=0)

# Get rid of BG columns 
data = data.loc[:, ~data.columns.str.endswith('BG')]

# Get rid of bad frames
df_filled = data.copy()
for row in range(1, df_filled.shape[0]-1):  # avoid first and last row to prevent index error
    for col in df_filled.columns:
        if df_filled.at[row, col] == 0:
            df_filled.at[row, col] = (df_filled.at[row-1, col] + df_filled.at[row+1, col]) / 2

# Subtract mean of promoterless
data = df_filled
promoterless_cols = [c for c in data.columns if c.startswith('U139') and 'BG' not in c]
promoterless_mean = data[promoterless_cols].mean(axis=1)
data = data.sub(promoterless_mean, axis=0)
data = data+70

# Signal to noise, noise defined as STD of preinduction signal
SNR = data/data.iloc[0:45, :].std(axis=0)

ind_starts = [61, 196, 338, 500, 644, 776]

# Sensor blocks (column index ranges)
sensor_blocks = {
    'arsR':  slice(0, 4),
    'zntA':  slice(4, 8),
    'zraP':  slice(8, 12),
    'cusC':  slice(12, 16),
    'merR':  slice(16, 20),
}

hm_labels = ['As', 'Cd', 'Hg', 'Pb', 'Cu', '0']

# Initialize foldchange DataFrame
foldchange = pd.DataFrame(
    index=sensor_blocks.keys(),
    columns=hm_labels,
    dtype=float
)


# Compute fold change
for hm, start in enumerate(ind_starts):
    hm_name = hm_labels[hm]

    for sensor, cols in sensor_blocks.items():
        window = SNR.iloc[start:start+24, cols]
        baseline = SNR.iloc[start, cols]

        foldchange.loc[sensor, hm_name] = (
            window.max(axis=0) / baseline
        ).mean()

# UNCOMMENT TO SAVE AGAIN 
#foldchange.to_csv('apple_foldchange.csv')