In [None]:
!cat requirements.txt
import sys
!{sys.executable} -m pip install -r requirements.txt
!{sys.executable} -m pip install --upgrade --force-reinstall git+https://github.com/hms-dbmi/pic-sure-python-client.git
!{sys.executable} -m pip install --upgrade --force-reinstall git+https://github.com/hms-dbmi/pic-sure-python-adapter-hpds.git
!{sys.executable} -m pip install --upgrade --force-reinstall git+https://github.com/hms-dbmi/pic-sure-biodatacatalyst-python-adapter-hpds.git


import json
from pprint import pprint

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from scipy import stats

import PicSureClient
import PicSureBdcAdapter

from python_lib.utils import get_multiIndex_variablesDict, joining_variablesDict_onCol

import re

# Pandas DataFrame display options
pd.set_option("max.rows", 100)

# Matplotlib display parameters
plt.rcParams["figure.figsize"] = (14,8)
font = {'weight' : 'bold',
        'size'   : 12}
plt.rc('font', **font)

In [None]:
# Connect to PICSURE Network

# Do not change these variable names
PICSURE_network_URL = "https://picsure.biodatacatalyst.nhlbi.nih.gov/picsure"
resource_id = "02e23f52-f354-4e8b-992c-d37c8b9ba140"

# put instructions for token
token_file = "token.txt"

with open(token_file, "r") as f:
    my_token = f.read()
    
client = PicSureClient.Client()
connection = client.connect(PICSURE_network_URL, my_token, True)
adapter = PicSureBdcAdapter.Adapter(connection)
resource = adapter.useResource(resource_id)

In [None]:
# pip install ipynb

from ipynb.fs.full.BDC_Harmonization_Functions import *

In [None]:
# Get dataframe of full results
full_dict = resource.dictionary().find().DataFrame()
full_multiindex_dict = get_multiIndex_variablesDict(full_dict)

In [None]:
# Now let us say we are interested in all the studies that relate to asthma
asthma_studies = full_multiindex_dict['name'].str.contains('asthma')
asthma_multiindex_dict = full_multiindex_dict[asthma_studies]
asthma_multiindex_dict

In [None]:
#Now we can select the studies of interest
studies_of_interest = asthma_multiindex_dict.index.get_level_values(0).unique()
studies_of_interest

In [None]:
harmonized_dictionary = {}
for i in range(len(studies_of_interest)):
    study = studies_of_interest[i]
    print('\nYou are looking at study: ', study)
    harmonized_dictionary[study] = pull_var_from_study(study, ['Asthma|Age'], resource)

In [None]:
def harmonize_df(df_dict):
    harmonized_df = df_dict[list(df_dict.keys())[0]]
    harmonized_df['Study'] = list(df_dict.keys())[0]
    for i in range(1,len(list(df_dict.keys()))):
        keys = list(df_dict.keys())[i]
        df_dict[keys]['Study'] = keys
        harmonized_df = pd.merge(harmonized_df, df_dict[keys], how = 'outer')
    return harmonized_df

In [None]:
final_df = harmonize_df(harmonized_dictionary)

In [None]:
final_df.to_csv('Asthma_Age_Harmonized_Final.csv')