In [33]:
import synapseclient
import pandas as pd
import numpy as np

In [34]:
# get model csv file
model = pd.read_csv("AD.model.csv")

In [35]:
# log in to synapse
syn = synapseclient.Synapse()
syn.login()


UPGRADE AVAILABLE

A more recent version of the Synapse Client (3.0.0) is available. Your version (2.7.2) can be upgraded by typing:
    pip install --upgrade synapseclient

Python Synapse Client version 3.0.0 release notes

https://python-docs.synapse.org/build/html/news.html



Welcome, Abby Vander Linden!



In [36]:
# get synapse annotations table
table = syn.tableQuery("select * from syn10242922", includeRowIdAndRowVersion = False)


In [37]:
table_df = pd.read_csv(table.filepath)

In [38]:
# recently added values for study and tissue that aren't in the synapse table
new_vals = model.loc[(model["Parent"] == "ValidValue") & ~model["Attribute"].isin(table_df["value"])]

In [39]:
# filter annotations table to only attributes in AD data model; remove enumerated pec grants
ad_table = table_df.loc[table_df["key"].isin(model["Attribute"])]
ad_table = ad_table.loc[ad_table["module"] != "PsychENCODESpecific"]

In [40]:
d = {
    "key" : ["study", 
             "tissue", 
             "tissue"],
    "description" : ["Study", 
                     "A tissue is a mereologically maximal collection of cells that together perform physiological function.", 
                     "A tissue is a mereologically maximal collection of cells that together perform physiological function."],
    "columnType" : ["string", "string", "string"],
    "value" : ["ROSMAP-IA",
               "entorhinal cortex",
               "angular gyrus"],
    "valueDescription" : ["The ROSMAP iPSC-derived Astrocyte Study (ROSMAP-IA)",
                          "Component of the temporal lobe on the mesial surface. The rostral and caudal boundaries of the entorhinal cortex are the rostral end of the collateral sulcus and the caudal end of the amygdala respectively. The medial boundary is the medial aspect of the temporal lobe and the lateral boundary is the collateral sulcus.",
                          "Part of inferior parietal lobule formed by the cortex surrounding the upturned end of the superior temporal sulcus."],
    "source" : ["https://adknowledgeportal.synapse.org/Explore/Studies/DetailsPage/StudyDetails?Study=syn52383678",
                "http://purl.obolibrary.org/obo/UBERON_0002728",
                "http://purl.obolibrary.org/obo/UBERON_0002686"],
    "module" : ["neuro", "experimentalData", "experimentalData"]
}

add_vals = pd.DataFrame(d)

all_table_terms = pd.concat([ad_table, add_vals], ignore_index=True)

In [41]:
# check that all model valid values are in the table
model.loc[model["Parent"] == "ValidValue"]["Attribute"].isin(all_table_terms["value"]).all()

True

In [42]:
# convert to schematic format and replace valid values
enum_terms = all_table_terms.loc[all_table_terms["value"].notna()]
enum_terms.columns

Index(['key', 'description', 'columnType', 'maximumSize', 'value',
       'valueDescription', 'source', 'module'],
      dtype='object')

In [43]:
# rename columns
enum_terms = enum_terms.rename(columns = {"key" : "Parent",
                             "valueDescription" : "Description",
                             "value" : "Attribute",
                             "source" : "Source"}
                             )

enum_terms = enum_terms[["Attribute", "Description", "Parent", "Source", "columnType", "module"]]

In [44]:
# add data model columns plus the extras
model_header = model.columns.to_list() + list(["columnType", "module"])
model_enums = enum_terms.reindex(columns = model_header)

In [45]:
# add module and columnType columns to the DataProperty attributes
attribs = model.loc[model["Parent"] == "DataProperty"]

# join to annots table
attrib_modules = all_table_terms[["key", "columnType", "module"]]
attrib_modules = attrib_modules.loc[attrib_modules["key"].isin(attribs["Attribute"])].drop_duplicates()

model_attribs = attribs.merge(attrib_modules, how = "left", left_on = "Attribute", right_on = "key").drop(["key"], axis = 1)

In [46]:
# slice and glue time
# this leaves Component, which has no parent
model_dts = model.loc[~model["Parent"].isin(["DataProperty", "ValidValue"])]
model_dts = model_dts.reindex(columns = model_header)

updated_model = pd.concat([model_dts,
                           model_attribs,
                           model_enums],
                           axis = 0)

# set all validation rules to nan for now
updated_model = updated_model.drop(["Validation Rules"], axis = 1)
updated_model = updated_model.reindex(columns = model_header)

  updated_model = pd.concat([model_dts,



In [47]:
# overwrite model csv
updated_model.to_csv("AD.model.csv", index = False)