## Purpose: The purpose of the script provides the GUIDs for a gene variant type, but excludes the GUIDs that does not include another gene variant type. 

1. Input: From API, data from the eyeGeneGenomics that has the gene variant types and two genes. 
2. Output: List of GUIDs that have both genes and specific gene variant. 

In [None]:
#Import dependencies
import pandas as pd # used for creating a dataframe and other methods using pandas
import requests # used to API calls
import json # used for creating json files
import getpass
from io import StringIO
import os
import datetime as dt
import time
import sys

In [None]:
def create_folder(folder_path):
    adjusted_folder_path = folder_path
    folder_found = os.path.isdir(adjusted_folder_path)
    counter = 0
    while folder_found == True:
        counter = counter + 1
        adjusted_folder_path = folder_path + ' (' + str(counter) + ')'
        folder_found = os.path.isdir(adjusted_folder_path)
    os.mkdir(adjusted_folder_path)
    return adjusted_folder_path

x=dt.datetime.now()
new_dir = os.getcwd()+'\\'+"EyeGene_"+x.strftime('%Y_%m_%d')+"_outputfiles"
created_dir = create_folder(new_dir)

Enter Login Information

In [None]:
## login
print("Enter your username")
username = input()

password = getpass.getpass("Enter your password")

In [None]:
loginheaders = {
    'accept': 'text/plain',
    'Content-Type': 'application/x-www-form-urlencoded'
}

logindata = {'password':password,
         'username': username}

In [None]:
response = requests.post("https://brics.nei.nih.gov/gateway/authentication/user/login", headers=loginheaders, data=logindata)
#login in check 
if response.status_code == 200:
    print("Login Successful")
    token=response.text
    print(token)
elif response.status_code != 200:
    print(response.status_code)
    print("Login not Successful. Please check username and password. If error still occurs reach out to system adminstrator. THIS CODE WILL NOT PROCEED")
    

## Enter values for genes and gene variant interpretation type. 

In [None]:
includelist =input("What variant type(s) to include?")
excludelist = input("What variant type(s) to exclude?")
genelist = input("Enter two genes")

In [None]:
include =[]
exclude =[]

include = includelist.split(',')
exclude = excludelist.split(',')


genevariantlist = include + exclude
print(f'Gene Variant Type: {genevariantlist}')

genes = genelist.split(",")
print(f'Genes: {genes}')

In [None]:
#check that values in genevariant list
genePVs = ["benign", "likely benign","likely pathogenic", "pathogenic","uncertain significance"]

for value in genevariantlist:
    if value in genePVs:
        print(value + " is in list")
    else: 
        print(value + "   is not in list.Please reenter values before proceeding")
        break

In [None]:
genefilter = [{
            "dataElement": "GeneVariantInterpretTyp",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "value": genevariantlist
        },]
for i in range(0,len(genes)): 
    dataelement={
            "dataElement": "HGNCGeneSymbl",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "operator":"OR",
            "value":[genes[i].strip("")]}
    genefilter.append(dataelement)

In [None]:
genomicsfilter = {
  "filter": genefilter,
  "flattened": "false",
  "formStudy": [
    {
      "form":"eyeGENEGenomics",
      "studies": ["EYEGENE-STUDY0000203"]
    }
  ]
}

In [None]:
genomicsfilter

In [None]:
queryurl ="https://brics.nei.nih.gov/gateway/query-api/data/csv"

headers = {
    'accept': 'application/csv',
    'Content-type': 'application/json',
    'Authorization':'Bearer ' + token }

In [None]:
query = requests.post(queryurl,headers=headers,json=genomicsfilter,stream=True)
query

In [None]:
print(f"Response: {query}")
print("Data received: " + query.headers["Content-Disposition"][21:96])

In [None]:
dataset = query.text
texttodf = StringIO(dataset)
nei_data = pd.read_csv(texttodf, sep=",")
print(f'Number of rows of data: {len(nei_data)}')
nei_data.head()

In [None]:
inputdata =nei_data

In [None]:
inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"]=inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].str.lower()

In [None]:
included_df=inputdata[inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].isin(include)]

In [None]:
excluded_df= inputdata[inputdata["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"].isin(exclude)]

In [None]:
datawgenevariant= newdata[~newdata["eyeGENEGenomics.Main.GUID"].isin(compdata["eyeGENEGenomics.Main.GUID"])]

In [None]:
name = {}
for gene in genes:
    name[gene] = pd.DataFrame()
    name[gene]=datawgenevariant[datawgenevariant["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl"]==gene]
    print(f'There are {len(name[gene])} rows of data for name[{gene}].')
print(f'There are {len(name)} dataframes created.')

In [None]:
dfs = []
for i in range(0,len(genes)):
    dfs.append(name[genes[i]])
    print(f'name{[genes[i]]}')

In [None]:
from functools import partial, reduce

merge = partial(pd.merge, on=["eyeGENEGenomics.Main.GUID"], how='inner')
results = reduce(merge, dfs)["eyeGENEGenomics.Main.GUID"]
uniqueguids = results.unique()
print(len(uniqueguids))

In [None]:
updated = datawgenevariant[datawgenevariant["eyeGENEGenomics.Main.GUID"].isin(uniqueguids)]

Output Information

In [None]:
print("The number of rows from query tool: " + str(len(nei_data)))
print("________________________________________________________")

print(f'The number of unique GUIDs that have gene variant type {genevariantlist}:   {len(updated["eyeGENEGenomics.Main.GUID"].unique())}')

print("________________________________________________________")

print("The files have been created in your folder  " + str(created_dir))
