In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from scipy.optimize import curve_fit
from statistics import mean
import scipy.stats as stats
import statistics
import re

In [2]:
def get_counts(df, time_step):
    filtered = df[df["time_step"] == time_step]
    counts = filtered["molecule"].value_counts().to_dict()
    for k in molecules.keys():
        if not k in counts:
            counts[k] = 0.0
    return counts

def get_counts_df(df):
    time_steps = df["time_step"].unique().tolist()
    rows = [get_counts(df, t) for t in time_steps]
    return pd.DataFrame.from_dict(rows)

def top_counts(df, time_step):
    top_df = df[df["y"] >= 15]
    top_df.reset_index(drop=True, inplace=True)
    filtered = top_df[top_df["time_step"] == time_step]
    top_counts = filtered["molecule"].value_counts().to_dict()
    for k in molecules.keys():
        if not k in top_counts:
            top_counts[k] = 0.0
    return top_counts

def get_top_counts(df):
    top_df = df[df["y"] >= 15]
    top_df.reset_index(drop=True, inplace=True)
    time_steps = top_df["time_step"].unique().tolist()
    rows = [get_counts(top_df, t) for t in time_steps]
    return pd.DataFrame.from_dict(rows)

def bottom_counts(df, time_step):
    bottom_df = df[df["y"] <= 5]
    bottom_df.reset_index(drop=True, inplace=True)
    filtered = bottom_df[bottom_df["time_step"] == time_step]
    bottom_counts = filtered["molecule"].value_counts().to_dict()
    for k in molecules.keys():
        if not k in bottom_counts:
            bottom_counts[k] = 0.0
    return bottom_counts

def get_bottom_counts(df):
    bottom_df = df[df["y"] <= 5]
    bottom_df.reset_index(drop=True, inplace=True)
    time_steps = bottom_df["time_step"].unique().tolist()
    rows = [get_counts(bottom_df, t) for t in time_steps]
    return pd.DataFrame.from_dict(rows)

def top_signal_transduction(df):
    top_df = df[df["y"] >= 15]
    top_df_HFRR = top_df[top_df["molecule"] == 15]
    top_result_df = top_df_HFRR.groupby('species_id')['time_step'].agg(['min', 'max'])
    top_result_df['signal transduction time'] = top_result_df['max'] - top_result_df['min']
    # return top_result_df[['species_id', 'signal transduction time']]
    return top_result_df

def bottom_signal_transduction(df):
    bottom_df = df[df["y"] <= 5]
    bottom_df_HFRR = bottom_df[bottom_df["molecule"] == 15]
    bottom_result_df = bottom_df_HFRR.groupby('species_id')['time_step'].agg(['min', 'max'])
    bottom_result_df['signal transduction time'] = bottom_result_df['max'] - bottom_result_df['min']
    return bottom_result_df


def HFRR_signal_transduction(df):
    df_HFRR = df[df["molecule"] == 15]
    result_df = df_HFRR.groupby('species_id')['time_step'].agg(['min', 'max'])
    result_df['signal transduction time'] = result_df['max'] - result_df['min']
    return result_df

def FRR_signal_transduction(df):
    df_FRR = df[df["molecule"] == 16]
    result_df = df_FRR.groupby('species_id')['time_step'].agg(['min', 'max'])
    result_df['signal transduction time'] = result_df['max'] - result_df['min']
    return result_df

def get_theta(file_name):
    theta = file.split('=')[1].split('_')[0]
    return float(theta)


molecules = {
    1: "vesicle_F",
    2: "vesicle_F_top",
    3: "vesicle_R",
    4: "vesicle_R_top",
    5: "vesicle_H",
    6: "vesicle_H_top",
    7: "fused_vesicle_F",
    8: "fused_vesicle_R",
    9: "fused_vesicle_H",
    10: "F",
    11: "H",
    12: "R",
    13: "RR",
    14: "FH",
    15: "FHRR",
    16: "FRR",
    17: "F_species",
    18: "H_species",
    19: "R_species",
    20: "F-membrane-bound"

}

In [10]:
%%sql


Exception: Data Source is not selected

In [ ]:
%%sql


In [6]:
ITERATION = 1
SRC_PATH = 'src'
DATA_PATH = 'receptor_knock_out/'
SAVE_PATH = f'receptor_knock_out/figures'
FILENAME_PREFIX = "20240324_symmetrybreaking_iteration="

In [7]:
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

In [8]:
data_files = [f for f in os.listdir(DATA_PATH) if f.endswith('.csv')]
n_data_files = len(data_files)

thetas = []
timeseries = []
list.sort(data_files)
print(data_files)

['20240216_receptorKO.csv', 'all_positions-0.067.csv', 'all_positions-0.067test.csv', 'receptor_KO.csv']


In [38]:
for file in data_files:
    label = re.sub(r'0.000005_symmetrybreaking_iteration=', '', file)
    label = re.sub(r'.csv', '', label)
    print(label)

1
10
2
3
4
5
6
7
8
9


In [39]:
transduction_dfs = []
plots = []

for file in data_files:
    print(f"[INFO] Processing {file}")
    data_df = pd.read_csv(f"{DATA_PATH}/{file}", sep=" ", header=None, names=["time_step", "molecule", "state", "x", "y", "species_id"])

    # label = re.sub(r'0.000005_symmetrybreaking_iteration=', '', file)
    # label = re.sub(r'.csv', '', label)
    # label = re.sub(r'20240315_symmetrybreaing_iteration=', '', file)
    # label = re.sub(r'.csv', '', label)

    HFRR_transduction_df = HFRR_signal_transduction(data_df)
    FRR_transduction_df = FRR_signal_transduction(data_df)
    #label = re.sub(r'20240228_HSPGfunctiontest_', '', file)
    #label = re.sub(r'_iteration-\d+.csv', '', label)

    HFRR_transduction_df.to_csv(f"/Users/aimee/Desktop/R_000005_sym/transduction_data/HFRR_20240204_script_iteration={label}.csv")
    #FRR_transduction_df.to_csv(f"output_HSPG_decay/transduction_data/FRRdecay={label}/FRR_HSPGdecay={label}_iteration10.csv")
    #FRR_transduction_df.to_csv(f"output/csv_files/iteration{ITERATION}/transduction_data/FRR_transduction_{label}_iteration{ITERATION}.csv")

    data_counts = get_counts_df(data_df)
    data_counts = data_counts.sort_index(axis=1)
    cell1_counts = get_top_counts(data_df)
    cell1_counts = cell1_counts.sort_index(axis=1)
    cell2_counts = get_bottom_counts(data_df)
    cell2_counts = cell2_counts.sort_index(axis=1)
    timesteps = data_df["time_step"].unique() / 10
    timesteps = pd.DataFrame(timesteps, columns=["timestep"])

    plots = pd.concat([timesteps, data_counts, cell1_counts, cell2_counts], axis=1)
    plots.to_csv(f"/Users/aimee/Desktop/R_000005_sym/plots_data/symmetry_iteration{label}.csv")
    #plots.to_csv(f"output/csv_files/iteration{ITERATION}/plots_data/plots_{label}_iteration{ITERATION}.csv")

print("[INFO] DONE!")

[INFO] Processing 0.000005_symmetrybreaking_iteration=1.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=10.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=2.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=3.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=4.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=5.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=6.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=7.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=8.csv
[INFO] Processing 0.000005_symmetrybreaking_iteration=9.csv
[INFO] DONE!


the output of the plots dataframe is 60 columns. the first 1-20 columns are molecules 1-20 in the whole simulation. the second 1-20 columns are in cell 1, and the third set in cell2. IMPORTANT. the columns have repeating names, so when you get to plotting stuff, you probably need to change the names. 

In [13]:
data_df = pd.read_csv(f"{DATA_PATH}/receptor_KO.csv", sep=" ", header=None, names=["time_step", "molecule", "state", "x", "y", "species_id"], on_bad_lines='skip')