## Purpose: The purpose of the script provides the GUIDs for a gene variant type, but excludes the GUIDs that does not include another gene variant type. 

1. Input: From API, data from the eyeGeneGenomics that has the gene variant types. 
2. Output: List of GUIDs and the medical condition type. 

In [None]:
import pandas as pd # used for creating a dataframe and other methods using pandas
import requests # used to API calls
import json # used for creating json files
import getpass
from io import StringIO
import os
import datetime as dt
import time
import sys

In [None]:
def create_folder(folder_path):
    adjusted_folder_path = folder_path
    folder_found = os.path.isdir(adjusted_folder_path)
    counter = 0
    while folder_found == True:
        counter = counter + 1
        adjusted_folder_path = folder_path + ' (' + str(counter) + ')'
        folder_found = os.path.isdir(adjusted_folder_path)
    os.mkdir(adjusted_folder_path)
    return adjusted_folder_path

x=dt.datetime.now()
new_dir = os.getcwd()+'\\'+"EyeGene_"+x.strftime('%Y_%m_%d')+"_outputfiles"
created_dir = create_folder(new_dir)

Enter User Credentials

In [None]:
## login
print("Enter your username")
username = input()

password = getpass.getpass("Enter your password")

In [None]:
loginheaders = {
    'accept': 'text/plain',
    'Content-Type': 'application/x-www-form-urlencoded'
}

logindata = {'password':password,
         'username': username}

In [None]:
response = requests.post("https://brics.nei.nih.gov/gateway/authentication/user/login", headers=loginheaders, data=logindata)
#login in check 
if response.status_code == 200:
    print("Login Successful")
    token=response.text
    print(token)
elif response.status_code != 200:
    print(response.status_code)
    print("Login not Successful. Please check username and password. If error still occurs reach out to system adminstrator. THIS CODE WILL NOT PROCEED")
    

## Enter values for query

In [None]:
includelist =input("What variant type(s) to include?")
excludelist = input("What variant type(s) to exclude?")

In [None]:
include =[]
exclude =[]

include = includelist.split(',')
exclude = excludelist.split(',')


genevariantlist = include + exclude
genevariantlist

In [None]:
#check
genePVs = ["benign", "likely benign","likely pathogenic", "pathogenic","uncertain significance"]

for value in genevariantlist:
    if value in genePVs:
        print(value + " is in list")
    else: 
        print(value + "   is not in list.Please reenter values before proceeding")
        break

In [None]:
genomicsfilter = {
    "formStudy": [
        {
            "form": "eyeGENEGenomics",
            "studies": ["EYEGENE-STUDY0000203"]
        }
    ],
    "filter": [
        {
            "dataElement": "GeneVariantInterpretTyp",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "value": genevariantlist
        }
    ]
}

In [None]:
queryurl ="https://brics.nei.nih.gov/gateway/query-api/data/csv"

headers = {
    'accept': 'application/csv',
    'Content-type': 'application/json',
    'Authorization':'Bearer ' + token }

In [None]:
query = requests.post(queryurl,headers=headers,json=genomicsfilter,stream=True)
query

In [None]:
print(f"Response: {query}")
print("Data received: " + query.headers["Content-Disposition"][21:96])

In [None]:
dataset = query.text
texttodf = StringIO(dataset)
nei_data = pd.read_csv(texttodf, sep=",")
nei_data.head()

In [None]:
# input the data from query tool
inputfile = pulldatafile("**/query_result_eyeGENEGenomics*")
inputdata =read_data(inputfile)
inputfile

In [None]:
inputdata =nei_data

In [None]:
inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"]=inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].str.lower()

In [None]:
newdata=inputdata[inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].isin(include)]

In [None]:
compdata= inputdata[inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].isin(exclude)]

In [None]:
updated = newdata[~newdata["eyeGENEGenomics.Main.GUID"].isin(compdata["eyeGENEGenomics.Main.GUID"])]

In [None]:
guidlist = []
genelist = []
medicalcond= []
for guid in uniqueguids:
    filtereddata = updated[updated["eyeGENEGenomics.Main.GUID"]==str(guid)][["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl","eyeGENEGenomics.Main.MedicalCondNEIEnrollTyp"]]
    genelist.append(filtereddata["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl"].unique().tolist())
    medicalcond.append(filtereddata["eyeGENEGenomics.Main.MedicalCondNEIEnrollTyp"].unique().tolist())
    guidlist.append(guid)

In [None]:
glist = pd.Series(guidlist)
genelist2 = pd.Series(genelist)
medlist = pd.Series(medicalcond)

In [None]:
ds = pd.concat([glist,genelist2,medlist],axis=1,ignore_index=True)

In [None]:
ds.rename(columns={0:"GUID",1:"HGNCGeneSymbl",2:"MedicalCondNEIEnrollTyp"},inplace=True)

In [None]:
print("The number of rows from query tool: " + str(len(nei_data)))
print("________________________________________________________")

print(f'The number of unique GUIDs that have gene variant type {genevariantlist}:   {len(updated["eyeGENEGenomics.Main.GUID"].unique())}')

print("________________________________________________________")

print("The files have been created in your folder  " + str(created_dir))
