## Purpose: The purpose of this script is to return all GUIDs that have two genes and a gene indicator of yes. The input will include the following:

1. Input: User receives data from the API Query that has two genes and a gene indicator of "yes"
2. Output: The second part of the script will perform an additional analysis that will identify the GUIDs that have both genes provided in the query request. The script will provide a CSV file with the list of GUIDs. 

## Please click "RUN ALL CELLS" from the Run Option

In [None]:
import pandas as pd
import requests
import json
import getpass
from io import StringIO
import os
import datetime as dt
import time
import sys

## Please enter your login information. 

In [None]:
## login
print("Enter your username")
username = input()

password = getpass.getpass("Enter your password")

In [None]:
#login in to API

loginheaders = {
    'accept': 'text/plain',
    'Content-Type': 'application/x-www-form-urlencoded'
}

logindata = {'password':password,
         'username': username}

In [None]:
response = requests.post("https://brics.nei.nih.gov/gateway/authentication/user/login", headers=loginheaders, data=logindata)


In [None]:
#login in check 
if response.status_code == 200:
    print("Login Successful")
elif response.status_code != 200:
    print(response.status_code)
    print("Login not Successful. Please check username and password. If error still occurs reach out to system adminstrator. THIS CODE WILL NOT PROCEED")
    
        

In [None]:
#get token from login
token = response.text
token



## Please enter the genes and gene variant indicator

In [None]:
print("Enter the value for HGNCGeneSymbl (PVs: )")
print("HGNCSymbol Gene 1")#ABCA4
HGNCGeneSymbl1 = input()

print("HGNCSymbol Gene 2")#PRPH2
HGNCGeneSymbl2 = input()

print("Enter the value for GeneVariantIndicator(PVs: )")
GeneVariantIndicator = input()

In [None]:
genomicsfilter = {
    "formStudy": [
        {
            "form": "eyeGENEGenomics",
            "studies": [
                "EYEGENE-STUDY0000203"
            ]
        }
    ],
    "filter": [
        {
            "dataElement": "HGNCGeneSymbl",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "operator":"OR",
            "value": [HGNCGeneSymbl1]
        },
        {
            "dataElement": "HGNCGeneSymbl",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "operator":"AND",
            "value": [HGNCGeneSymbl2]
        },
        {
            "dataElement": "GeneVariantIndicator",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "value": [GeneVariantIndicator]
        }
    ]
} 
 



In [None]:
queryurl ="https://brics.nei.nih.gov/gateway/query-api/data/csv"

headers = {
    'accept': 'application/csv',
    'Content-type': 'application/json',
    'Authorization':'Bearer ' + token }

In [None]:
%timeit
query = requests.post(queryurl,headers=headers,json=genomicsfilter)
query

In [None]:
print("Data received: " + query.headers["Content-Disposition"][21:96]) 

In [None]:
dataset = query.text

In [None]:
#prepare data
dataset = query.text
texttodf = StringIO(dataset)
genes_data = pd.read_csv(texttodf, sep=",")
genes_data.head()

In [None]:
#updated data

#querytest2 = new_data[new_data.HGNCGeneSymbl=="ABCA4"].merge(genes_data[((genes_data.HGNCGeneSymbl=="PRPH2"))][['GUID']], left_on="GUID",right_on="GUID", how="inner")[['GUID','HGNCGeneSymbl']]
querytest2 = genes_data[genes_data["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl"]==HGNCGeneSymbl1].merge(genes_data[((genes_data["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl"]==HGNCGeneSymbl2))][['eyeGENEGenomics.Main.GUID']], left_on="eyeGENEGenomics.Main.GUID",right_on="eyeGENEGenomics.Main.GUID", how="inner")[['eyeGENEGenomics.Main.GUID','eyeGENEGenomics.Genomics Information.HGNCGeneSymbl']]
uniqueGUID=querytest2["eyeGENEGenomics.Main.GUID"].unique()

In [None]:
updated = genes_data[genes_data["eyeGENEGenomics.Main.GUID"].isin(uniqueGUID)]

In [None]:
def create_folder(folder_path):
    adjusted_folder_path = folder_path
    folder_found = os.path.isdir(adjusted_folder_path)
    counter = 0
    while folder_found == True:
        counter = counter + 1
        adjusted_folder_path = folder_path + ' (' + str(counter) + ')'
        folder_found = os.path.isdir(adjusted_folder_path)
    os.mkdir(adjusted_folder_path)
    return adjusted_folder_path

x=dt.datetime.now()
new_dir = os.getcwd()+'\\'+"EyeGene_"+x.strftime('%Y_%m_%d')+"_outputfiles"
created_dir = create_folder(new_dir)
# writer = pd.ExcelWriter(created_dir+'\\'+newFile, engine = 'xlsxwriter')
# newFile = "StudyId_" +str(studyId)+ '_'+formName+'_results.xlsx'



In [None]:
newfile = updated.to_excel(created_dir+"\\"+x.strftime('%Y_%m_%d')+ "genevariant.xlsx",index=False)
                           
uniqueGUIDsfile = pd.DataFrame(uniqueGUID,columns=["Unique GUIDs"]).to_csv(created_dir+"\\"+x.strftime('%Y_%m_%d')+ "_UniqueGUIDSwBothGenes.csv",index=False)

#print("The newfile: " + str(newfile) + "  has been created. Please reach out to your operations for support or questions")

In [None]:
print("The number of rows of data: " + str(len(genes_data)))
print("________________________________________________________")

print("The number of unique GUIDs: " + str(len(uniqueGUID)))

print("________________________________________________________")

print("The number of rows in updated dataset:" + str(len(updated)))

print("________________________________________________________")

print("The files have been created in your folder  " + str(created_dir))


