In [1]:
from datetime import date

import hisepy
import os
import pandas as pd

In [2]:
if not os.path.isdir('output'):
    os.mkdir('output')

### Helper functions

In [3]:
def cache_uuid_path(uuid):
    cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
    if not os.path.isdir(cache_path):
        hise_res = hisepy.reader.cache_files([uuid])
    cache_filename = os.listdir(cache_path)[0]
    cache_file = '{p}/{f}'.format(p = cache_path, f = cache_filename)

    return cache_file

In [4]:
def read_csv_uuid(csv_uuid, index_col = None):
    csv_file = cache_uuid_path(csv_uuid)
    if index_col is None:
        df = pd.read_csv(csv_file)
    else:
        df = pd.read_csv(csv_file, index_col = index_col)
    return df

In [5]:
def element_id(n = 3):
    import periodictable
    from random import randrange
    rand_el = []
    for i in range(n):
        el = randrange(0,118)
        rand_el.append(periodictable.elements[el].name)
    rand_str = '-'.join(rand_el)
    return rand_str

### Retrieve DESeq2 results from HISE

In [6]:
deg_uuid = 'ae34daa7-00b3-4c4a-b770-c3922da030ef'
deg = read_csv_uuid(deg_uuid)

In [7]:
d0_d7_deg_uuid = 'bd2e9e22-f425-4927-b472-18d15711df27'
d0_d7_deg = read_csv_uuid(d0_d7_deg_uuid, index_col = 0)

In [8]:
deg = pd.concat([deg, d0_d7_deg], axis = 0)

In [9]:
deg['contrast'].value_counts()

contrast
sample.visitName         376678
cohort.cohortGuid        373942
CMV                      373942
subject.biologicalSex    373942
Name: count, dtype: int64

In [10]:
contrast_to_name = {
    'cohort.cohortGuid': 'Subject Age: Younger | Older',
    'CMV': 'CMV Status: Negative | Positive',
    'subject.biologicalSex': 'Subject Sex: Female | Male',
    'sample.visitName': 'Flu Vaccine: Day 0 | Day 7'
}

In [11]:
contrast_sets = {
    'cohort.cohortGuid': { 'fg': 'Older Adult', 'bg': 'Younger Adult' },
    'CMV': { 'fg': 'Positive', 'bg': 'Negative' },
    'subject.biologicalSex': {'fg': 'Male', 'bg': 'Female' },
    'sample.visitName': {'fg': 'Flu Year 1 Day 7', 'bg': 'Flu Year 1 Day 0'}
}

In [12]:
rename_dict = {
    'celltype': 'AIFI_L3',
    'log2FoldChange': 'log2fc'
}

In [13]:
contrasts = deg['contrast'].unique()
deg_dfs = {}
for contrast in contrasts:
    df = deg.loc[deg['contrast'] == contrast].copy()
    contrast_name = contrast_to_name[contrast]

    df['fg'] = contrast_sets[contrast]['fg']
    df['bg'] = contrast_sets[contrast]['bg']
    
    df = df.rename(rename_dict, axis = 1)
    df = df[['AIFI_L3', 'fg', 'bg', 
             'gene', 'log2fc', 
             'padj', 'pvalue', 'stat']]
    
    deg_dfs[contrast_name] = df

In [14]:
contrast_files = {
    'cohort.cohortGuid': 'diha_age-group_deseq2_results_{d}.csv'.format(d = date.today()),
    'CMV': 'diha_cmv-status_deseq2_results_{d}.csv'.format(d = date.today()),
    'subject.biologicalSex': 'diha_biological-sex_deseq2_results_{d}.csv'.format(d = date.today()),
    'sample.visitName': 'diha_flu-vaccine_deseq2_results_{d}.csv'.format(d = date.today())
}

In [15]:
out_files = []
for contrast in contrasts:
    deg_df = deg_dfs[contrast_name]
    out_file = 'output/' + contrast_files[contrast]

    deg_df.to_csv(out_file, index = False)
    out_files.append(out_file)

  values = values.astype(str)


## Upload DEG data to HISE

Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps.

In [16]:
study_space_uuid = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
title = 'DIHA DEG results {d}'.format(d = date.today())

In [17]:
search_id = element_id()
search_id

'polonium-selenium-silver'

In [18]:
in_files = [deg_uuid, d0_d7_deg_uuid]
in_files

['ae34daa7-00b3-4c4a-b770-c3922da030ef',
 'bd2e9e22-f425-4927-b472-18d15711df27']

In [19]:
out_files

['output/diha_age-group_deseq2_results_2024-08-09.csv',
 'output/diha_cmv-status_deseq2_results_2024-08-09.csv',
 'output/diha_biological-sex_deseq2_results_2024-08-09.csv',
 'output/diha_flu-vaccine_deseq2_results_2024-08-09.csv']

In [20]:
len(out_files)

4

In [21]:
hisepy.upload.upload_files(
    files = out_files,
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = in_files,
    destination = search_id
)

you are trying to upload file_ids... ['output/diha_age-group_deseq2_results_2024-08-09.csv', 'output/diha_cmv-status_deseq2_results_2024-08-09.csv', 'output/diha_biological-sex_deseq2_results_2024-08-09.csv', 'output/diha_flu-vaccine_deseq2_results_2024-08-09.csv']. Do you truly want to proceed?


(y/n) y


{'trace_id': '356e8cfd-dd46-4d8f-95a4-8a73ac046181',
 'files': ['output/diha_age-group_deseq2_results_2024-08-09.csv',
  'output/diha_cmv-status_deseq2_results_2024-08-09.csv',
  'output/diha_biological-sex_deseq2_results_2024-08-09.csv',
  'output/diha_flu-vaccine_deseq2_results_2024-08-09.csv']}

In [22]:
import session_info
session_info.show()