In [None]:
###############################################
###############################################
# Setup
# This chuck ingests pre-computed variables
###############################################
###############################################

# [Shift + Return] to run chunks 

###############################################

# settings
wkdir, s3dir = '/home/ubuntu/data/DL20181011_melanocyte_test_data', 'daniel.le-work/MEL_project'

%reload_ext autoreload
%autoreload 2
from scanpy_helpers import *
warnings.filterwarnings('ignore')
%matplotlib inline

# import data from s3
for file in ['adata_subset1', 'adata_subset2', 'full_adata', 'pre_adata','adata_subset1_KRT','raw_adata']:
#     ! aws s3 cp s3://{s3dir}/{file}.p {wkdir}/
    var = pickle.load((open(f'{wkdir}/{file}.p', 'rb')))
    exec(f'{file} = var')
    del var

# drop patients due to low cell count
adata_subset2_filtered = adata_subset2[[x not in ['A1015LM',
                                                'A1017LM',
                                                'A1012M'] for x in adata_subset2.obs.patient.tolist()]] 

# plot full data
sc.pl.umap(full_adata, color=['age','general_location'], cmap = 'magma_r')
sc.pl.umap(adata_subset2_filtered, color=['age','general_location'], cmap = 'magma_r')

print('Completed')

In [None]:
%%capture
###############################################
###############################################
# Compute union of differentially expressed gene among samples
# This chuck returns a dataframe of DE genes found in all samples
###############################################
###############################################

# Specify patients of interest
patientOIs = ['12WKM01', 'A1038LM']

# Specify feature to compare
groupby = 'louvain'

# Specify the number of top DE genes per test
n_genes = 50

###############################################

ranks_dict = {}
for idx, patientOI in enumerate(patientOIs):
    exec(f'global adata_{patientOI}')
    input_adata = adata_subset2_filtered
    patients = list(set(input_adata.obs.patient))
    type_dict = {'in':[patientOI],
                 'out':[x for x in patients if x != patientOI]}

    classify_type(raw_adata, input_adata, 'patient', type_dict, 'class_3')

    feature_dict = {'class_3':['in']}
    adata_subset3 = subset_adata_v3(raw_adata,feature_dict)
    adata_subset3 = process_adata(adata_subset3)
    pca_adata(adata_subset3, num_pcs=30)
    umap_adata(adata_subset3, res=0.2)
    sc.pl.umap(adata_subset3, color=['louvain','general_location'])

    exec(f'adata_{patientOI} = input_adata = adata_subset3')
    ranks_df = simple_rank (input_adata, groupby=groupby, n_genes=n_genes)
    ranks_df['patient'] = patientOI
    ranks_dict[patientOI] = ranks_df


In [None]:
###############################################
###############################################
# Visualize clustering
# This chuck displays UMAPS
###############################################
###############################################

# Specify patients of interest
patientOIs = ['12WKM01', 'A1038LM']

# Specify gene of interest
geneOI = 'NTRK2'

###############################################

for idx, patientOI in enumerate(patientOIs):
    print(patientOI)
    exec(f'sc.pl.umap(adata_{patientOI}, color=["louvain","general_location", "{geneOI}"])')


In [None]:
###############################################
###############################################
# Display summary of DE genes found in all samples
# This chuck displays MEL and all-cells UMAPS and gene annotations
###############################################
###############################################

# Specify patients and feature group
patient2feature = {'12WKM01':'0',
                   'A1038LM':'1'}

###############################################

full_list = []
for key,value in patient2feature.items():
    list_iter = list(set(ranks_dict[key][value].values.tolist()))
    full_list = full_list + list_iter

union_df = pd.DataFrame({'genes':full_list})['genes'].value_counts()
genes = union_df[union_df > 1].index.tolist()
sc.pl.umap(adata_subset2, color=genes, ncols = 2)
sc.pl.umap(full_adata, color=genes, ncols = 2)
out = symbol2field(genes)
full_report = ['{}: {}\n{}'.format(idx,
                            x['query'],
                            x['summary']) if ('summary' in x.keys()) else '{}: {}\n{}'.format(idx,
                            x['query'],
                            'N/A') for idx, x in enumerate(out)]
search_term = ''
output = [print(x) for x in full_report if search_term in x]
