# Prepare figshare

In [2]:
import os,sys
import pandas as pd
import scanpy as sc
import shutil

In [3]:
figshare_dir = '/home/jovyan/mount/gdrive/sc_targetID/figshare_data/'
DE_results_dir = os.path.join(figshare_dir, 'DEA_results/')
pbulk_dir = os.path.join(figshare_dir, 'cxg_aggregated_scRNA/')

if not os.path.exists(figshare_dir):
    os.mkdir(figshare_dir)
    os.mkdir(DE_results_dir)
    os.mkdir(pbulk_dir)

In [15]:
## Read disease IDs
disease_ids = pd.read_csv('../../data/all_diseases_postqc.csv')['disease_ontology_id'].tolist()
len(disease_ids)

30

In [23]:
for mondo_id in disease_ids:
    pbulk_file = f"cellxgene_targets_{mondo_id}.pbulk_all_genes.h5ad"
    DE_celltype_file = f"DE_celltype_{mondo_id}.hvgs.csv"
    DE_diseasecelltype_file = f"DE_diseasecelltype_{mondo_id}.hvgs.csv"
    
    shutil.copy(
        os.path.join('../../data/', pbulk_file),
        os.path.join(pbulk_dir, pbulk_file)
    )
    
    shutil.copy(
        os.path.join('../../data/', DE_celltype_file),
        os.path.join(DE_results_dir, DE_celltype_file)
    )
    
    shutil.copy(
        os.path.join('../../data/', DE_diseasecelltype_file),
        os.path.join(DE_results_dir, DE_diseasecelltype_file)
    )

In [28]:
## Upload all supplementary tables
suppl_tables_files = [x for x in os.listdir('../../data/') if x.startswith('suppl_table')]
for tab in suppl_tables_files:
    shutil.copy(
        os.path.join('../../data/', tab),
        os.path.join(figshare_dir, tab)
    )

In [29]:
## Upload original OpenTargets table
OT_table_file = 'TargetDiseasePairs_OpenTargets_cellXgeneID_12072023.clean.csv'
shutil.copy(
    os.path.join('../../data/', OT_table_file),
    os.path.join(figshare_dir, OT_table_file)
)

'/home/jovyan/mount/gdrive/sc_targetID/figshare_data/TargetDiseasePairs_OpenTargets_cellXgeneID_12072023.clean.csv'

In [31]:
figshare_dir

'/home/jovyan/mount/gdrive/sc_targetID/figshare_data/'

In [30]:
os.listdir(figshare_dir)

['DEA_results',
 'TargetDiseasePairs_OpenTargets_cellXgeneID_12072023.clean.csv',
 'cxg_aggregated_scRNA',
 'suppl_table_cxg_sample_metadata.post_qc.csv',
 'suppl_table_disease_target_evidence.csv',
 'suppl_table_diseases.csv',
 'suppl_table_odds_ratios.all.csv',
 'suppl_table_odds_ratios.disease.csv']

### Add data for expanded cell type specificity analysis

In [8]:
expanded_DE_results_dir = os.path.join(figshare_dir, 'DEA_results_expanded/')
expanded_pbulk_dir = os.path.join(figshare_dir, 'cxg_aggregated_scRNA_expanded/')

if not os.path.exists(figshare_dir):
    os.mkdir(expanded_DE_results_dir)
    os.mkdir(expanded_pbulk_dir)

In [11]:
all_tissue_ids = [
    'blood',
    'brain',
    'bone-marrow',
    'colon',
    'esophagus',
    'eye',
    'heart',
    'liver',
    'lung',
    'lymph-node',
    'nose',
    'small-intestine'
]

for id in all_tissue_ids:
    pbulk_file = f"cellxgene_targets_{id}.pbulk_all_genes.h5ad"
    DE_celltype_file = f"DE_celltype_{id}.hvgs.csv"
    
    shutil.copy(
        os.path.join('../../data/', pbulk_file),
        os.path.join(expanded_pbulk_dir, pbulk_file)
    )
    
    shutil.copy(
        os.path.join('../../data/', DE_celltype_file),
        os.path.join(expanded_DE_results_dir, DE_celltype_file)
    )

In [13]:
## Upload disease-specific analysis outputs
dsa_files = [x for x in os.listdir('../../data/') if x.startswith('disease_specific_analysis.')]
for tab in dsa_files:
    shutil.copy(
        os.path.join('../../data/', tab),
        os.path.join(figshare_dir, tab)
    )

In [15]:
## Upload expanded analysis outputs
edrt_files = [x for x in os.listdir('../../data/') if x.startswith('expanded_DRT')]
for tab in edrt_files:
    shutil.copy(
        os.path.join('../../data/', tab),
        os.path.join(figshare_dir, tab)
    )

In [None]:
shutil.copy(
        os.path.join('../../data/', tab),
        os.path.join(figshare_dir, tab)
    )