In [1]:
from protzilla.runner import Runner,Run
from protzilla.constants.paths import PROJECT_PATH
from protzilla.utilities.transform_dfs import long_to_wide
import pandas as pd
import json

In [2]:
norm_methods = [
    {"method":"z_score","parameters":{}, "fc_threshold":0.3157},
    {"method":"median","parameters":{"percentile":0.5},"fc_threshold":0.0309},
    {"method":"totalsum","parameters":{},"fc_threshold":0.0000638564}
]

In [3]:
imp_methods = [
    {"method":"min_value_per_sample","parameters":{"shrinking_value":1}},
    {"method":"simple_imputation_per_protein","parameters":{"strategy":"median"}},
    {"method":"simple_imputation_per_protein","parameters":{"strategy":"most_frequent"}},
    {"method":"knn","parameters":{"number_of_neighbours":5}}
    ]

In [4]:
def adjust_wf(norm_method, imp_method):
    with open(f"{PROJECT_PATH}\\user_data\\workflows\\ba_39_norm_imp_wf.json") as wf_file:
        wf = json.load(wf_file)
    wf["sections"]["data_preprocessing"]["steps"][1]["method"] = norm_method["method"]
    wf["sections"]["data_preprocessing"]["steps"][1]["parameters"] = norm_method["parameters"]
    wf["sections"]["data_preprocessing"]["steps"][2]["method"] = imp_method["method"]
    wf["sections"]["data_preprocessing"]["steps"][2]["parameters"] = imp_method["parameters"]
    wf["sections"]["data_analysis"]["steps"][0]["parameters"]["fc_threshold"] = norm_method["fc_threshold"]
    new_wf = json.dumps(wf,indent=2)
    with open(f"{PROJECT_PATH}\\user_data\\workflows\\ba_39_norm_imp_wf.json", "w") as wf_file:
        wf_file.write(new_wf)

In [5]:
def iterate_through_methods(ms_data_path,p):
    for norm_method in norm_methods:
        for imp_method in imp_methods:
            adjust_wf(norm_method,imp_method)
            norm_name = norm_method["method"]
            imp_name = imp_method["method"]
            try:
                add_info = imp_method["parameters"]["strategy"]
            except:
                add_info = ""

            runner = Runner(
                **{
                "workflow": "ba_39_norm_imp_wf",
                "ms_data_path": ms_data_path,
                "meta_data_path": f"{PROJECT_PATH}\\user_data\\data\\meta.csv",
                "peptides_path": None,
                "run_name": f"ba_39_{p}_{norm_name}_{imp_name}{add_info}",
                "df_mode": "disk",
                "all_plots": True,
                "verbose": False,
                }
            )
            runner.compute_workflow()

In [6]:
probabilities = [0,0.05,0.1,0.15,0.2,0.25,0.3,0.4,0.5]

In [7]:
for p in probabilities:
    iterate_through_methods(f"{PROJECT_PATH}\\user_data\\data\\ba_39_small_{p}.csv",p)

INFO:root:Run ba_39_0_z_score_min_value_per_sample created at C:\Users\mon28\Documents\Uni\BP\PROTzilla2\user_data\runs\ba_39_0_z_score_min_value_per_sample
INFO:root:Saving plots at C:\Users\mon28\Documents\Uni\BP\PROTzilla2\user_data\runs\ba_39_0_z_score_min_value_per_sample\plots
INFO:root:------ computing workflow



INFO:root:imported MS Data
INFO:root:imported Meta Data
INFO:root:performing step: ('data_preprocessing', 'transformation', 'log_transformation')
INFO:root:performing step: ('data_preprocessing', 'normalisation', 'z_score')
INFO:root:performing step: ('data_preprocessing', 'imputation', 'min_value_per_sample')
INFO:root:performing step: ('data_analysis', 'differential_expression', 't_test')
INFO:root:creating plot: ('data_analysis', 'plot', 'volcano')
INFO:root:Run ba_39_0_z_score_simple_imputation_per_proteinmedian created at C:\Users\mon28\Documents\Uni\BP\PROTzilla2\user_data\runs\ba_39_0_z_score_simple_imputation_per_proteinmedian
INFO:root:Saving plots at C:\Users\mon28\Documents\Uni\BP\PROTzilla2\user_data\runs\ba_39_0_z_score_simple_imputation_per_proteinmedian\plots
INFO:root:------ computing workflow

INFO:root:imported MS Data
INFO:root:imported Meta Data
INFO:root:performing step: ('data_preprocessing', 'transformation', 'log_transformation')
INFO:root:performing step: ('dat