<a href="https://colab.research.google.com/github/adanog/Vip3Aa/blob/main/Dose_response_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Dose-Response Analysis of Sf9 Cells Exposed to Bacillus thuringiensis Insecticidal Protein


This script is designed for image data analysis of cell segmentation in bright field microscopy images of Sf9 cells exposed to *Bacillus thuringiensis* insecticidal protein.

## Overview:
- Script created by Adan Guerrero (adan.guerrero@ibt.unam.mx).
- This script analyzes the dose-response relationship between cell conditions and exposure to the protein.
- It requires the `summary_report.csv` file generated by the script `cyto3-edited.ipynb`.

## Input:
- The `summary_report.csv` file contains cell segmentation data, including the number of ROIs, median area, and concentration values.

## Output:
- Tables with metrics for each sample (e.g., number of ROIs, median area, concentration).
- Dose-response plots visualizing the relationship between protein concentration and cell response.

## Dependencies:
- Python 3.x
- Libraries: `statsmodels`, `seaborn`, `pandas`
- Install the required libraries via:
  `pip install statsmodels seaborn pandas`

## Execution:
1. Ensure that the `summary_report.csv` file is available in the appropriate directory.
2. Run the script to process the data and generate dose-response plots.


In [1]:
#!pip install statsmodels seaborn

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
################ Libraries
import os
import pandas as pd
import numpy as np
from scipy.stats import norm
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.backends.backend_pdf
import re
from scipy.stats import ttest_ind

################ Fuctions
# Extract the concentration from the filenames
def extract_concentration(filename):
    if "control" in filename:
        return 0.0
    match = re.search(extraction_string, filename)
   # match = re.search(r'\b(\d+(\.\d+)?)\b', filename)
    if match:
        return float(match.group(1))
    return None

###          Cry1Fa
#folder_path = "Cry1Fa  toxin and protoxin/Cry1Fa Toxin/72h/"
#conc = [0.5, 1, 1.5, 2, 2.5, 3, 3.75]
#extraction_string = r'72h tox Cry1Fa (\d+(\.\d+)?)'
#es = '72h tox Cry1Fa'
###


#folder_path = "Cry1Fa  toxin and protoxin/Cry1Fa Protoxin/72h"
#conc = [1, 1.5, 2, 2.5, 3, 3.75]
#extraction_string = r'72h Cry1Fa protoxin (\d+(\.\d+)?)'
#es= '72h Cry1Fa protoxin'

################

########### Vip 3

folder_path = "/content/gdrive/MyDrive/2024/colaboraciones/Ale Bravo/fotos Sf9 con Vip3Aa o Cry1Fa /Shared Data via Zenodo and Github/Vip3Aa toxin and protoxin/TOXIN Vip3/72 h toxin"
#folder_path = "Vip3Aa toxin and protoxin/TOXIN Vip3/72 h toxin"
conc = [2.5, 5, 7.5, 10]
extraction_string = r' (\d+(\.\d+)?)'
es= '72h Vip3Aa toxin'

#folder_path = "Vip3Aa toxin and protoxin/PROTOX Vip3/72h protoxin"
#conc = [5, 10, 20]
#extraction_string = r' (\d+(\.\d+)?)'
#es= '72h Vip3Aa Protox'


############## Load data
file_path = 'summary_report.csv'
# Define the summary CSV file path
file_path = os.path.join(folder_path, file_path)
df = pd.read_csv(file_path)
df

Unnamed: 0,Filename,Number of ROIs,Median Area
0,72h control-1.jpg,270,679.0
1,72h control-2.jpg,274,586.5
2,72h control-3.jpg,281,718.0
3,72h Vip3Aa toxin 2.5-2.jpg,113,1218.0
4,72h Vip3Aa toxin 2.5-1.jpg,120,1211.0
5,72h Vip3Aa toxin 5-1.jpg,73,1128.0
6,72h Vip3Aa toxin 5-3.jpg,81,1102.0
7,72h Vip3Aa toxin 2.5-3.jpg,115,975.0
8,72h Vip3Aa toxin 5-2.jpg,103,1220.0
9,72h Vip3Aa toxin 7.5-1.jpg,98,809.0


In [4]:
df['Filename'].str.extract(extraction_string)[0].astype(float)

Unnamed: 0,0
0,
1,
2,
3,2.5
4,2.5
5,5.0
6,5.0
7,2.5
8,5.0
9,7.5


In [5]:
# Extract the concentration values from the filename using the manually defined pattern
df['Concentration'] = df['Filename'].str.extract(extraction_string)[0].astype(float)

# Identify control conditions: filenames that do not contain 'tox 1Fa'
control_conditions = df[df['Filename'].str.contains(es) == False].copy()

# Add a Concentration column to control conditions and set it to NaN
control_conditions.loc[:, 'Concentration'] = float('nan')

# Add a Concentration column to control conditions and set it to a unique value for plotting
control_conditions['Concentration'] = 0  # Assuming 0 represents control condition


# Sort by concentration and filename for better readability
df = df.sort_values(by=['Concentration', 'Filename'])

df["Concentration"] = df["Filename"].apply(extract_concentration)
df

Unnamed: 0,Filename,Number of ROIs,Median Area,Concentration
4,72h Vip3Aa toxin 2.5-1.jpg,120,1211.0,2.5
3,72h Vip3Aa toxin 2.5-2.jpg,113,1218.0,2.5
7,72h Vip3Aa toxin 2.5-3.jpg,115,975.0,2.5
5,72h Vip3Aa toxin 5-1.jpg,73,1128.0,5.0
8,72h Vip3Aa toxin 5-2.jpg,103,1220.0,5.0
6,72h Vip3Aa toxin 5-3.jpg,81,1102.0,5.0
9,72h Vip3Aa toxin 7.5-1.jpg,98,809.0,7.5
13,72h Vip3Aa toxin 7.5-2.jpg,83,1053.0,7.5
11,72h Vip3Aa toxin 7.5-3.jpg,58,1255.0,7.5
10,72h Vip3Aa toxin 10-1.jpg,79,1221.0,10.0


In [6]:
df[df['Filename'].str.contains(es) == False].copy()

Unnamed: 0,Filename,Number of ROIs,Median Area,Concentration
0,72h control-1.jpg,270,679.0,0.0
1,72h control-2.jpg,274,586.5,0.0
2,72h control-3.jpg,281,718.0,0.0


In [7]:
 df.groupby("Concentration")["Number of ROIs"].apply(list).reset_index()

Unnamed: 0,Concentration,Number of ROIs
0,0.0,"[270, 274, 281]"
1,2.5,"[120, 113, 115]"
2,5.0,"[73, 103, 81]"
3,7.5,"[98, 83, 58]"
4,10.0,"[79, 79, 87]"


In [8]:
# Group by concentration and list the number of ROIs for each concentration
df_grouped = df.groupby("Concentration")["Number of ROIs"].apply(list).reset_index()

# Ensure each concentration has three replicates
df_grouped["Replicate 1"] = df_grouped["Number of ROIs"].apply(lambda x: x[0] if len(x) > 0 else None)
df_grouped["Replicate 2"] = df_grouped["Number of ROIs"].apply(lambda x: x[1] if len(x) > 1 else None)
df_grouped["Replicate 3"] = df_grouped["Number of ROIs"].apply(lambda x: x[2] if len(x) > 2 else None)
# Drop the original "Number of ROIs" column
df_grouped = df_grouped.drop(columns=["Number of ROIs"])

# Calculate the average number of ROIs in the control group
average_control_rois = np.nanmean(df_grouped[df_grouped["Concentration"] == 0.0][['Replicate 1', 'Replicate 2', 'Replicate 3']].values.flatten())

# Calculate the mortality rate for each concentration
mortality_rates_corrected = []
for i, row in df_grouped.iterrows():
    if row["Concentration"] != 0.0:
        treated_rois = [row["Replicate 1"], row["Replicate 2"], row["Replicate 3"]]
        average_treated_rois = np.nanmean(treated_rois)
        mortality_rate = (average_control_rois - average_treated_rois) / average_control_rois * 100
        mortality_rates_corrected.append({"Dose": row["Concentration"], "Mortality": mortality_rate})
        print(average_treated_rois)

# Convert mortality rates to a DataFrame
mortality_df_corrected = pd.DataFrame(mortality_rates_corrected)

mortality_df_corrected

116.0
85.66666666666667
79.66666666666667
81.66666666666667


Unnamed: 0,Dose,Mortality
0,2.5,57.818182
1,5.0,68.848485
2,7.5,71.030303
3,10.0,70.30303


In [9]:
#np.mean([127, 142, 115, 193, 170, 173])

In [10]:

# Group by concentration and list the number of ROIs for each concentration
df_grouped = df.groupby("Concentration")["Number of ROIs"].apply(list).reset_index()

# Ensure each concentration has three replicates
df_grouped["Replicate 1"] = df_grouped["Number of ROIs"].apply(lambda x: x[0] if len(x) > 0 else None)
df_grouped["Replicate 2"] = df_grouped["Number of ROIs"].apply(lambda x: x[1] if len(x) > 1 else None)
df_grouped["Replicate 3"] = df_grouped["Number of ROIs"].apply(lambda x: x[2] if len(x) > 2 else None)
# Drop the original "Number of ROIs" column
df_grouped = df_grouped.drop(columns=["Number of ROIs"])

# Calculate the average number of ROIs in the control group
average_control_rois = np.mean(df_grouped[df_grouped["Concentration"] == 0.0][['Replicate 1', 'Replicate 2', 'Replicate 3']].values.flatten())

# Calculate the mortality rate for each concentration
mortality_rates_corrected = []
for i, row in df_grouped.iterrows():
    if row["Concentration"] != 0.0:
        treated_rois = [row["Replicate 1"], row["Replicate 2"], row["Replicate 3"]]
        average_treated_rois = np.mean(treated_rois)
        mortality_rate = (average_control_rois - average_treated_rois) / average_control_rois * 100
        #if mortality_rate >= 0:  # Omit negative values
            #mortality_rates_corrected.append({"Dose": row["Concentration"], "Mortality": mortality_rate})
        mortality_rates_corrected.append({"Dose": row["Concentration"], "Mortality": mortality_rate})

# Convert mortality rates to a DataFrame
mortality_df_corrected = pd.DataFrame(mortality_rates_corrected)

# Probit transformation of the mortality rates (expressed as proportions)
mortality_df_corrected["Probit"] = norm.ppf(mortality_df_corrected["Mortality"] / 100)

# Fit a probit model using the dose values and probit-transformed mortality rates
dose_values_corrected = mortality_df_corrected["Dose"].values
probit_values_corrected = mortality_df_corrected["Probit"].values
probit_model_corrected = sm.OLS(probit_values_corrected, sm.add_constant(dose_values_corrected)).fit()

# Extract parameters from the model
intercept_corrected, slope_corrected = probit_model_corrected.params

# Calculate the dose corresponding to 50% mortality (Probit value = 0)
ic50_dose_corrected = -intercept_corrected / slope_corrected



#######################
# Filter the data for both experimental and control cases
df_filtered = pd.concat([df[df['Concentration'].isin(conc)], control_conditions])

# Sort by concentration and filename for better readability
df_filtered = df_filtered.sort_values(by=['Concentration', 'Filename'])

# Compute the mean control median area
control_mean_area = control_conditions['Median Area'].mean()

# Normalize the median area to the control mean area
df_filtered['Normalized Area'] = df_filtered['Median Area'] / control_mean_area

# Perform statistical tests (t-test) to compare each concentration against the control
p_values = []
for concentration in df_filtered['Concentration'].unique():
    if concentration == 0:
        continue  # Skip control
    experimental_data = df_filtered[df_filtered['Concentration'] == concentration]['Median Area']
    t_stat, p_val = ttest_ind(control_conditions['Median Area'], experimental_data)
    p_values.append((concentration, p_val))

# Determine significance levels
significance = [(0.0, 'NaN')]
for concentration, p_val in p_values:
    if p_val < 0.001:
        significance.append((concentration, '***'))
    elif p_val < 0.01:
        significance.append((concentration, '**'))
    elif p_val < 0.05:
        significance.append((concentration, '*'))
    else:
        significance.append((concentration, 'ns'))


# Convert the significance list to a DataFrame
significance_df = pd.DataFrame(significance, columns=['Concentration', 'Significance'])

# Compute means and standard errors
grouped = df_filtered.groupby('Concentration')['Normalized Area'].agg(['mean', 'sem']).reset_index()


grouped = pd.merge(grouped, significance_df, on='Concentration', how='right')

########################
#df
df_filtered

Unnamed: 0,Filename,Number of ROIs,Median Area,Concentration,Normalized Area
0,72h control-1.jpg,270,679.0,0.0,1.026973
1,72h control-2.jpg,274,586.5,0.0,0.887068
2,72h control-3.jpg,281,718.0,0.0,1.085959
4,72h Vip3Aa toxin 2.5-1.jpg,120,1211.0,2.5,1.831611
3,72h Vip3Aa toxin 2.5-2.jpg,113,1218.0,2.5,1.842198
7,72h Vip3Aa toxin 2.5-3.jpg,115,975.0,2.5,1.474666
5,72h Vip3Aa toxin 5-1.jpg,73,1128.0,5.0,1.706075
8,72h Vip3Aa toxin 5-2.jpg,103,1220.0,5.0,1.845223
6,72h Vip3Aa toxin 5-3.jpg,81,1102.0,5.0,1.666751
9,72h Vip3Aa toxin 7.5-1.jpg,98,809.0,7.5,1.223595


In [11]:
mortality_df_corrected

Unnamed: 0,Dose,Mortality,Probit
0,2.5,57.818182,0.197244
1,5.0,68.848485,0.49156
2,7.5,71.030303,0.55427
3,10.0,70.30303,0.533136


In [12]:
grouped

Unnamed: 0,Concentration,mean,sem,Significance
0,0.0,1.0,0.058977,
1,2.5,1.716158,0.120785,**
2,5.0,1.73935,0.05414,***
3,7.5,1.571465,0.195018,*
4,10.0,1.646584,0.131392,*


In [13]:
mortality_df_corrected.to_csv(os.path.join(folder_path, es+" mortality.csv"), index=False)
grouped.to_csv(os.path.join(folder_path, es+' cell size.csv'), index=False)

# Convert the value to a DataFrame
ic50_df = pd.DataFrame({'IC50 Dose': [ic50_dose_corrected]})

# Calculate the dose corresponding to 50% mortality (Probit value = 0)
ic50_df.to_csv(os.path.join(folder_path, es+" IC50.csv"), index=False)


print('DataFrames have been saved to mortality_df_corrected.csv and grouped.csv')

DataFrames have been saved to mortality_df_corrected.csv and grouped.csv
