## Prepare folders for GO analyses

In this notebook, we copy all relevant survival scores for each enrichment analysis to a different directory, where subsequently a standard GO enrichment analysis is performed.

In [1]:
import os

os.chdir("../../data/GO_analyses")

carls_results_file = "../results_tables/carls_table.xlsx"

headers_to_prepare_folders_for = ["C", "A", "R", "L", "S", "Survival Score"]

In [2]:
import pandas as pd

def filter_if_survival_score_not_to_be_included(df):
    return df[df["Used for Enrichment"]]

def filter_valid_values(df, header):
    #filter valid numeric values in float column
    return df[~df[header].isna()]


# Load carls_results_file as pandas dataframe. Go through headers_to_prepare_folders_for and create folders for each one. Each folder will contain a table with the header column
df = pd.read_excel(carls_results_file)
for header in headers_to_prepare_folders_for:
    #Create folder for header if it doesn't exist
    if not os.path.exists(header):
        os.makedirs(header)
    #Create tsv table in header folder with name zvals.tsv and subset df to "Gene" and header column. Use f strings to create file name
    filepath = f"{header}/{header}_zvals.tsv"
    print(f"\"{os.path.abspath(filepath)}\",")
   
    if "Survival Score" in header:
        df = filter_if_survival_score_not_to_be_included(df)
    subset_df =  df[["Gene", header]]
    subset_df = filter_valid_values(subset_df, header)
    subset_df.to_csv(filepath, sep="\t", index=False, header = None)



for header in headers_to_prepare_folders_for:
    print(f"\"{os.path.abspath(header)}\",")

"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/C/C_zvals.tsv",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/A/A_zvals.tsv",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/R/R_zvals.tsv",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/L/L_zvals.tsv",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/S/S_zvals.tsv",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/Survival Score/Survival Score_zvals.tsv",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/C",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/A",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/R",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/L",
"/Users/constantin/workspace/ecoli/ecoli_survival_scoring/data/GO_analyses/S",
"/Users/constantin/workspace/ecol