# NOC processing

In [5]:
import os
import sys
from pathlib import Path
import pandas as pd

script_path = Path.cwd().parent.parent.parent.parent.parent.parent.parent / "script"
data_path = Path.cwd().parent.parent.parent.parent.parent.parent.parent / "data"
sys.path.append(str(script_path))

### Load NOC table


In [6]:
#%store -r timestamp
timestamp = "2024-07-16"

print(f"Timestamp: {timestamp}")

outprefix = f"{timestamp}_QC_filter_impute"

outdir = Path.cwd() / "output"
preprocessing_out = outdir / f"preprocessing"
NOC_path = data_path / "external" / f"elife-16950-supp9-v3.xlsx"

try:
    cyt_table = pd.read_excel(NOC_path, sheet_name="LFQ Static Cyt")
    nuc_table = pd.read_excel(NOC_path, sheet_name="LFQ Static Nuc")
    org_table = pd.read_excel(NOC_path, sheet_name="LFQ Static Org")
except FileNotFoundError:
    print(f"File {NOC_path} not found.\nPlease rerun the previous steps or specify the correct timestamp, current value is {timestamp}")
except pd.errors.ParserError:
    print(f"There was an error parsing the CSV file at {NOC_path}.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Timestamp: 2024-07-16


### Load fraction table

In [7]:
enrich_out_dir = outdir / "fraction_tables"
os.makedirs(enrich_out_dir, exist_ok=True)

fraction_csv_path = enrich_out_dir / f"{timestamp}_fraction_table.csv"

final_fraction_table = pd.read_csv(fraction_csv_path, header=[0, 1], index_col=0)


### Calculation of proportion

In [8]:
# compute median of 6 MAPS
cyt_table["median LFQ intensity cytosolic"] = cyt_table.iloc[:, 4:10].median(axis=1)
nuc_table["median LFQ intensity nuclear"] = nuc_table.iloc[:, 4:10].median(axis=1)
org_table["median LFQ intensity organelle"] = org_table.iloc[:, 4:10].median(axis=1)

In [9]:
def compute_proportion(row):
    uniprot_id = row[("metadata", "Unnamed: 0")]
    if not uniprot_id in list(cyt_table["Lead IDs"]):
        cyt_intensity = 0
    else:
        cyt_intensity = cyt_table.loc[cyt_table["Lead IDs"] == uniprot_id, "median LFQ intensity cytosolic"].values[0]
    if not uniprot_id in list(nuc_table["Lead ID"]):
        nuc_intensity = 0
    else:
        nuc_intensity = nuc_table.loc[nuc_table["Lead ID"] == uniprot_id, "median LFQ intensity nuclear"].values[0]
    if not uniprot_id in list(org_table["Lead ID"]):
        org_intensity = 0
    else:
        org_intensity = org_table.loc[org_table["Lead ID"] == uniprot_id, "median LFQ intensity organelle"].values[0]
    total_intensity = cyt_intensity + nuc_intensity + org_intensity
    return nuc_intensity/total_intensity, org_intensity/total_intensity, cyt_intensity/total_intensity

In [10]:
final_fraction_table[[("sample","NOC_nuclear"), ("sample","NOC_organelle"), ("sample","NOC_cytosol")]] = final_fraction_table.apply(lambda row: pd.Series(compute_proportion(row)), axis=1)

# rename
final_fraction_table = final_fraction_table.rename(columns={("metadata","Unnamed: 0"): ("metadata","uniprot_id")})

final_fraction_table.columns = pd.MultiIndex.from_tuples(
    [(upper, lower.replace('Unnamed: 0', 'Protein IDs')) for upper, lower in final_fraction_table.columns]
)

In [11]:
# remove proteins where the sum of all NOC fractions is 0
final_fraction_table = final_fraction_table[final_fraction_table[("sample","NOC_nuclear")] + final_fraction_table[("sample","NOC_organelle")] + final_fraction_table[("sample","NOC_cytosol")] > 0]

In [12]:
final_fraction_table

Unnamed: 0_level_0,metadata,sample,sample,sample,sample,sample,sample,sample,sample
Unnamed: 0_level_1,Protein IDs,03K,06K,12K,24K,80K,NOC_nuclear,NOC_organelle,NOC_cytosol
0,Q9NRG9,0.120000,-0.624683,-0.398767,0.571433,1.538233,0.547935,0.443235,0.008831
1,Q2M2I8,-0.281217,0.188500,-0.149583,-0.492483,-0.693417,0.186118,0.414969,0.398914
2,Q13685,-1.738557,-1.332183,-2.056200,-2.026821,-1.672267,0.033859,0.027504,0.938638
3,P49588,-1.020700,-0.868417,-1.445783,-1.402567,-0.873433,0.051564,0.047146,0.901290
4,Q5JTZ9,-1.180700,-0.643750,3.548467,3.283900,4.336467,0.240722,0.757053,0.002225
...,...,...,...,...,...,...,...,...,...
4923,Q9NWK9,-1.347828,-0.980580,-1.817261,-1.845244,-2.083272,0.324373,0.090242,0.585385
4924,Q8NHG8,-0.930238,-1.118053,-1.059400,-0.709979,-0.711571,0.025922,0.385171,0.588907
4925,O95218,-1.124483,-0.924583,-1.733533,-1.705583,-1.195067,0.157405,0.159210,0.683386
4926,O43264,0.274350,-0.622217,-0.613917,0.441200,1.235767,0.069885,0.273558,0.656557


### save to file

In [13]:
# saving the final tables to files
enrich_out_dir = outdir / "fraction_tables"
os.makedirs(enrich_out_dir, exist_ok=True)

save_csv_path = enrich_out_dir / f"{timestamp}_fraction_table_NOC.csv"

final_fraction_table.to_csv(save_csv_path)