In [None]:
import pandas as pd
import dalmatian as dm

In [None]:
# rMATS analyzes skipped exon (SE), alternative 5' splice site (A5SS), alternative 3' splice site (A3SS), 
# mutually exclusive exons (MXE), and retained intron (RI) events
rmats_cols = ["A3SS_JC_output", "A5SS_JC_output", "MXE_JC_output", "RI_JC_output", "SE_JC_output"]

In [None]:
import json

rna_renaming = {}
with open('../temp/22minerva/rna_sample_renaming.json') as json_file:
    rna_renaming = json.load(json_file)


In [None]:
def aggregate_rmats(ws, renaming_dict, rmats_cols):
    wm = dm.WorkspaceManager(ws).disable_hound()
    rnasamples = wm.get_samples()
    for c in rmats_cols:
        print("aggregating " + c)
        df_list = []
        for i in rnasamples.index:
            if i in renaming_dict:
                arxspan = renaming_dict.loc[i, "arxspan_id"]
                df = pd.read_csv(rnasamples.loc[i, c], sep='\t')
                df = df.dropna(axis=1, how='all')
                df = df.drop(['ID', 'ID.1'], axis=1)
                df = df[~df.IncLevel1.isna()]
                df['DepMap_ID'] = rna_renaming[i]
                df_list.append(df)
        agg_df = pd.concat(df_list)
        print("saving file: " + c + "_aggregated.csv")
        agg_df.to_csv(c + "_aggregated.csv", index=False)
        print(c + "_aggregated.csv saved")

In [None]:
pd.read_csv("A3SS_JC_output_aggregated.csv", sep='\t')

In [None]:
from taigapy import TaigaClient
tc = TaigaClient()

tc.update_dataset(
        changes_description="updated aggregated rMATS outputs",
        dataset_permaname="omics-features-952e",
        upload_files=[
            {
                "path": "A3SS_JC_output_aggregated.csv",
                "format": "TableCSV",
                "encoding": "utf-8",
            },
            {
                "path": "A5SS_JC_output_aggregated.csv",
                "format": "TableCSV",
                "encoding": "utf-8",
            },
            {
                "path": "MXE_JC_output_aggregated.csv",
                "format": "TableCSV",
                "encoding": "utf-8",
            },
            {
                "path": "RI_JC_output_aggregated.csv",
                "format": "TableCSV",
                "encoding": "utf-8",
            },
#             {
#                 "path": "SE_JC_output_aggregated.csv",
#                 "format": "TableCSV",
#                 "encoding": "utf-8",
#             },
        ],
        add_all_existing_files=True
    )

In [None]:
# SE_JC output file is too big for taiga
# stored here: gs://cclebams-sandbox/SE_JC_output_aggregated.csv