In [1]:
from pathlib import Path
import pandas as pd
import re

In [2]:
data_folder = Path("../processed_data/03062024_v3/")

In [3]:
tables = []
pattern = r"(?P<time>\d+)h.*_(?P<replicate>\d+)"
for f in data_folder.rglob("aggresome/*.csv"):
    current_table = pd.read_csv(f)
    match = re.match(pattern, f.stem)
    metadata = match.groupdict()
    current_table["timepoint"] = int(metadata["time"])
    current_table["replicate"] = int(metadata["replicate"])
    tables.append(current_table)

In [4]:
df = pd.concat(tables)

  df = pd.concat(tables)


In [5]:
# aggresomes_df = df[df["num_pixels"] > 1000].groupby(["timepoint", "replicate"], as_index=False)["label"].count()

In [6]:
aggresomes_df = df[df["num_pixels"] > 1000].groupby(["timepoint", "replicate", "intensity_max"], as_index=False)["label"].count().groupby(["timepoint", "replicate"], as_index=False)["intensity_max"].count()

In [7]:
aggresomes_df

Unnamed: 0,timepoint,replicate,intensity_max
0,0,5,2
1,0,9,1
2,0,10,1
3,0,18,1
4,12,1,3
5,12,8,1
6,12,12,1
7,18,1,27
8,18,2,23
9,18,3,14


In [8]:
df[df["num_pixels"] > 1000]

Unnamed: 0,label,centroid-0,centroid-1,num_pixels,intensity_max,timepoint,replicate
66,67,2037.964483,1652.303967,2168,37.0,0,10
5,6,1181.984902,1214.899344,4570,18.0,0,18
59,60,1592.618634,1938.179977,1728,63.0,0,5
94,95,2030.907275,1796.188843,2653,0.0,0,5
55,56,2038.178689,1363.938525,1220,0.0,0,9
...,...,...,...,...,...,...,...
72,73,1033.776556,1710.956261,4001,8.0,24,5
116,117,1503.499699,1623.963319,1663,10.0,24,5
15,16,702.459584,1352.545940,5431,0.0,24,6
27,28,927.892063,1020.111111,1575,9.0,24,7


In [9]:
cell_tables = []
pattern = r"(?P<time>\d+)h.*_(?P<replicate>\d+)"
for f in data_folder.rglob("cells/*.csv"):
    current_table = pd.read_csv(f)
    match = re.match(pattern, f.stem)
    metadata = match.groupdict()
    current_table["timepoint"] = int(metadata["time"])
    current_table["replicate"] = int(metadata["replicate"])
    cell_tables.append(current_table)

In [10]:
cells_df = pd.concat(cell_tables).groupby(["timepoint", "replicate"], as_index=False)["label_count"].count()

In [11]:
cells_df

Unnamed: 0,timepoint,replicate,label_count
0,0,1,63
1,0,2,65
2,0,3,85
3,0,4,90
4,0,5,84
...,...,...,...
111,24,8,36
112,24,9,45
113,24,10,34
114,24,11,41


In [12]:
result = cells_df.merge(aggresomes_df, on=["timepoint", "replicate"], how="left").rename(columns={"label_count": "num_cells", "intensity_max": "num_aggresomes"})

In [13]:
result = result.fillna(0)

In [14]:
result

Unnamed: 0,timepoint,replicate,num_cells,num_aggresomes
0,0,1,63,0.0
1,0,2,65,0.0
2,0,3,85,0.0
3,0,4,90,0.0
4,0,5,84,2.0
...,...,...,...,...
111,24,8,36,0.0
112,24,9,45,0.0
113,24,10,34,2.0
114,24,11,41,4.0


In [15]:
result["ratio_aggresome-containing_cells"] = result["num_aggresomes"] / result["num_cells"]

In [16]:
result.to_csv("output.csv", index=False)

In [18]:
import altair as alt

In [21]:
alt.Chart(result).mark_circle(size=30).encode(
    x="timepoint",
    y="ratio_aggresome-containing_cells"
).interactive()

In [23]:
summary = result.groupby("timepoint", as_index=False)["ratio_aggresome-containing_cells"].mean()

In [24]:
summary

Unnamed: 0,timepoint,ratio_aggresome-containing_cells
0,0,0.005204
1,4,0.0
2,8,0.0
3,12,0.007143
4,18,0.226271
5,24,0.103267


In [29]:
summary.to_csv("summary.csv", index=False)

In [28]:
alt.Chart(summary).mark_line().encode(
    x="timepoint",
    y="ratio_aggresome-containing_cells"
).interactive()