## Purpose: From the join of the two form structures, eyeGeneDemographics and eyeGeneGenomics, provide the GUIDs with one gene and one or more gene variant types. 

1. Input: Data from API, which includes the eyeGeneGenomics joined on the eyeGeneDemographics, filtered on a gene. 
2. Output: Provides the list of GUIDs that have the gene and gene variant types. Provides a CSV file with the GUIDs and the demographics information for the GUIDs. 

Please run cells

In [None]:
import pandas as pd # used for creating a dataframe and other methods using pandas
import requests # used to API calls
import json # used for creating json files
import getpass
from io import StringIO
import os
import datetime as dt
import time
import sys

In [None]:
def create_folder(folder_path):
    adjusted_folder_path = folder_path
    folder_found = os.path.isdir(adjusted_folder_path)
    counter = 0
    while folder_found == True:
        counter = counter + 1
        adjusted_folder_path = folder_path + ' (' + str(counter) + ')'
        folder_found = os.path.isdir(adjusted_folder_path)
    os.mkdir(adjusted_folder_path)
    return adjusted_folder_path

x=dt.datetime.now()
new_dir = os.getcwd()+'\\'+"EyeGene_"+x.strftime('%Y_%m_%d')+"_outputfiles"
created_dir = create_folder(new_dir)

Please enter log in information

In [None]:
## login
print("Enter your username")
username = input()

password = getpass.getpass("Enter your password")

In [None]:
#headers and payload for authentication
loginheaders = {
    'accept': 'text/plain',
    'Content-Type': 'application/x-www-form-urlencoded'
}

logindata = {'password':password,
         'username': username}

In [None]:
response = requests.post("https://bricsnei-stage.cit.nih.gov/gateway/authentication/user/login", headers=loginheaders, data=logindata)
#login in check 
if response.status_code == 200:
    print("Login Successful")
    token=response.text
    print(token)
elif response.status_code != 200:
    print(response.status_code)
    print("Login not Successful. Please check username and password. If error still occurs reach out to system adminstrator. THIS CODE WILL NOT PROCEED")
    

## Please enter values for gene and gene variant type. 

In [None]:
print("HGNCGeneSymbol")
HGNCGeneSymbol = input()

In [None]:
print("GeneVariantInterpretTyp")

print("PVs = benign, likely benign,likely pathogenic, pathogenic,uncertain significance")
      
print("Enter multiple using a comma (,)")
GeneVariantInterpretTyp = input().lower()
genevariantlist =GeneVariantInterpretTyp.split(',')

In [None]:
genomicsfilter = {
    "formStudy": [
        {
            "form": "eyeGENEGenomics",
            "studies": ["EYEGENE-STUDY0000203"]
        },
          {
            "form": "eyeGENEDemographics",
            "studies": ["EYEGENE-STUDY0000203"]
        },
    ],
    "filter": [
        {
            "dataElement": "HGNCGeneSymbl",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "value": [HGNCGeneSymbol]
        }
    ] 
}

In [None]:
queryurl ="https://bricsnei-stage.cit.nih.gov/gateway/query-api/data/csv"

headers = {
    'accept': 'application/csv',
    'Content-type': 'application/json',
    'Authorization':'Bearer ' + token }

In [None]:
query = requests.post(queryurl,headers=headers,json=genomicsfilter)
query

In [None]:
print(f"Response: {query}")
print("Data received: " + query.headers["Content-Disposition"][21:96]) 

In [None]:
dataset = query.text
texttodf = StringIO(dataset)
nei_data = pd.read_csv(texttodf, sep=",")
nei_data.head()

In [None]:
# input the data from query tool
inputdata =nei_data

In [None]:
inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"]=inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].str.lower()

In [None]:
newdata=inputdata[inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].isin(genevariantlist)]

In [None]:
compdata= inputdata[~inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].isin(genevariantlist)]

In [None]:
updated = newdata[~newdata["eyeGENEGenomics.Main.GUID"].isin(compdata["eyeGENEGenomics.Main.GUID"])]

In [None]:
demographicsdata=inputdata.iloc[:,25:]

In [None]:
demographicsdf = demographicsdata[demographicsdata["eyeGENEDemographics.Main.GUID"].isin(updated["eyeGENEGenomics.Main.GUID"].unique())]

In [None]:
demographics_updated =demographicsdf.drop_duplicates().reset_index(drop=True)

In [None]:
demographics_updated.head()

In [None]:
uniqueGUIDsfile = pd.DataFrame(updated["eyeGENEGenomics.Main.GUID"].unique(),columns=["Unique GUIDs"]).to_excel(created_dir+"\\"+ x.strftime('%Y_%m_%d')+"_UniqueGUIDS.xlsx",index=False)

In [None]:
demographicsfile = demographics_updated.to_excel(created_dir+"\\"+ x.strftime('%Y_%m_%d')+"_DemographicsData.xlsx",index=False)

In [None]:
print("The number of rows from query tool: " + str(len(nei_data)))
print("________________________________________________________")

print(f'The number of unique GUIDs that have gene variant type {genevariantlist[0]} and {genevariantlist[1]}:   {len(newdata["eyeGENEGenomics.Main.GUID"].unique())}')

print("________________________________________________________")

    
print(f'The number of GUIDs that have gene variant type {genevariantlist}, and {HGNCGeneSymbol} : {len(updated["eyeGENEGenomics.Main.GUID"].unique())}')

print("________________________________________________________")

print("The files have been created in your folder  " + str(created_dir))