### *You must first run the "MISC - Install Libraries.ipynb" notebook to install needed libraries*

## Import the needed libraries

In [None]:
import pandas
import numpy
import matplotlib

## Import your security token from file

In [None]:
import os
env = %env
token_name = "COPDgene_token.txt"
token_file = token_name
has_error = False
if os.path.isfile(token_file):
    __token__ = open(token_file, "r").read()
    if len(__token__) == 0:
        has_error = True
else: 
    has_error = True
if has_error:
    print("\x1b[31m")
    print("!!! [_Security_Token_Error_] !!!")
    print("Please copy and save a valid PIC-SURE authentication token value into the file \""+token_name+"\".")
    print("This file is located in the current Notebook directory.")
    open(token_file, "w").write("")
else:
    print("\x1b[32m")
    print("[_Security_Token_Imported_Correctly_]")

## Create an instance of the datasource adapter and get a reference to the data resource 

In [None]:
import PicSureClient
import PicSureHpdsLib

client = PicSureClient.Client()
connection = client.connect("https://copdgene-dev.hms.harvard.edu/picsure/", __token__, allowSelfSignedSSL=True)
adapter = PicSureHpdsLib.Adapter(connection)
adapter.list()

In [None]:
resource = adapter.useResource("b6ef7b1a-56f6-11e9-8958-0242c0a83007")

## Get a listing of all "demographics" entries in the data dictionary. Show what actions can be done with the "demographic_results" object

In [None]:
demographic_entries = resource.dictionary().find("Demographics")
demographic_entries.help()

## Examine the demographic_entries results by converting it into a pandas DataFrame

In [None]:
demographic_entries.DataFrame()

In [None]:
resource.query().help()

In [None]:
resource.query().filter().help()

In [None]:
query_male = resource.query()
query_male.filter().add("\\01 Demographics\\Gender\\", ["Male"])

query_female = resource.query()
query_female.filter().add("\\01 Demographics\\Gender\\", ["Female"])

In [None]:
field_age = resource.dictionary().find("\\01 Demographics\\Age at enrollment\\")
field_BMI = resource.dictionary().find("\\BMI\\")

query_male.require().add(field_age.keys())
query_male.require().add(field_BMI.keys())
query_female.require().add(field_age.keys())
query_female.require().add(field_BMI.keys())

# add the correct consent group
query_male.filter().add("\\00 Consent groups\\", ["COPD_HMB"])
query_female.filter().add("\\00 Consent groups\\", ["COPD_HMB"])

# show one of the queries' structure 
query_female.show()

## Convert the query results for females into a DataFrame and plot it by BMI and Age

In [None]:
df_f = query_female.getResultsDataFrame()
plot_f = df_f.plot.scatter(x="\\01 Demographics\\Age at enrollment\\", y="\\03 Clinical data\\Physical characteristics\\BMI\\", c="#ffbabb40")

# ____ Uncomment if graphs are not displaying ____
plot_f.plot()
matplotlib.pyplot.show()

## Convert the query results for males into a DataFrame and plot it by BMI and Age

In [None]:
df_m = query_male.getResultsDataFrame()
plot_m = df_m.plot.scatter(x="\\01 Demographics\\Age at enrollment\\", y="\\03 Clinical data\\Physical characteristics\\BMI\\", c="#5a7dd040")

# ____ Uncomment if graphs are not displaying ____
plot_m.plot()
matplotlib.pyplot.show()

## Replot the results using a single DataFrame containing both male and female

In [None]:
d = resource.dictionary()
criteria = []
criteria.extend(d.find("\\Gender\\").keys())
criteria.extend(d.find("\\BMI\\").keys())
criteria.extend(d.find("\\Age at enrollment\\").keys())

query_unified = resource.query()
query_unified.require().add(criteria)
query_unified.filter().add("\\00 Consent groups\\", ["COPD_HMB"]) # MUST ADD CONSENT GROUP

df_mf = query_unified.getResultsDataFrame()

# map a color field for the plot to use
sex_colors = {'Male':'#5a7dd040', 'Female':'#ffbabb40'}
df_mf['\\sex_color\\'] = df_mf['\\01 Demographics\\Gender\\'].map(sex_colors)


# plot data
plot_mf = df_mf.plot.scatter(x="\\01 Demographics\\Age at enrollment\\", y="\\03 Clinical data\\Physical characteristics\\BMI\\", c=df_mf['\\sex_color\\'])

# ____ Uncomment if graphs are not displaying ____
plot_mf.plot()
matplotlib.pyplot.show()

## Replot data but trim outliers

In [None]:
# use a masked arrays to remove outliers
q = df_mf["\\03 Clinical data\\Physical characteristics\\BMI\\"].quantile(0.9999)
test1 = df_mf.mask(df_mf["\\03 Clinical data\\Physical characteristics\\BMI\\"] > q)
test2 = test1.mask(df_mf["\\01 Demographics\\Age at enrollment\\"] < 45)
tests = test2.mask(df_mf["\\01 Demographics\\Age at enrollment\\"] > 80 )

# plot data
plot_mf = tests.plot.scatter(x="\\01 Demographics\\Age at enrollment\\", y="\\03 Clinical data\\Physical characteristics\\BMI\\", c=df_mf['\\sex_color\\'])

# ____ Uncomment if graphs are not displaying ____
plot_mf.plot()
matplotlib.pyplot.show()