## Purpose:
The purpose of this script is the provide all the GUIDs that have both genes and a gene variant interpretation type. 

Input: Queried data on two genes 
Output: All GUIDs with both genes and the gene variant interpretation type

In [None]:
import pandas as pd
import requests
import json
import getpass
from io import StringIO
import os
import datetime as dt
import time
import sys

## Please enter login information

In [None]:
## login
print("Enter your username")
username = input()

password = getpass.getpass("Enter your password")

In [None]:
#login in to API

loginheaders = {
    'accept': 'text/plain',
    'Content-Type': 'application/x-www-form-urlencoded'
}

logindata = {'password':password,
         'username': username}

In [None]:
response = requests.post("https://brics.nei.cit.nih.gov/gateway/authentication/user/login", headers=loginheaders, data=logindata)


In [None]:
#login in check 
if response.status_code == 200:
    print("Login Successful")
    token = response.text
elif response.status_code != 200:
    print(response.status_code)
    print("Login not Successful. Please check username and password. If error still occurs reach out to system adminstrator. THIS CODE WILL NOT PROCEED")
    
        

## Please enter in the genes and gene variant interpretation type. 

In [None]:
print("Enter the value for HGNCGeneSymbl (PVs: )")
print("HGNCSymbol Gene 1")#ABCA4
HGNCGeneSymbl1 = input()

print("HGNCSymbol Gene 2")#PRPH2
HGNCGeneSymbl2 = input()

print("Enter the value for GeneVariantInterpretTyp(PVs: )")
GeneVariantInterpretTyp = input().lower()

In [None]:
genomicsfilter = {
    "formStudy": [
        {
            "form": "eyeGENEGenomics",
            "studies": [
                "EYEGENE-STUDY0000203"
            ]
        }
    ],
    "filter": [
        {
            "dataElement": "HGNCGeneSymbl",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "operator":"OR",
            "value": [
               HGNCGeneSymbl1
            ]
        },
        {
            "dataElement": "HGNCGeneSymbl",
            "form": "eyeGENEGenomics",
            "repeatableGroup": "Genomics Information",
            "value": [
                HGNCGeneSymbl2
            ]
        }
    ]
}
           
 
 



In [None]:
queryurl ="https://bricsnei-stage.cit.nih.gov/gateway/query-api/data/csv"

headers = {
    'accept': 'application/csv',
    'Content-type': 'application/json',
    'Authorization':'Bearer ' + token }

In [None]:
%time query = requests.post(queryurl,headers=headers,json=genomicsfilter)

In [None]:
if query.status_code != 200:
    print("Query not successful. Response Status:  " + str(response.status_code))
else:
    print("Data received: " + query.headers["Content-Disposition"][21:96]) 

    

In [None]:
#prepare data
dataset = query.text
texttodf = StringIO(dataset)
genes_data = pd.read_csv(texttodf, sep=",")
genes_data.head()

In [None]:
genelist = []
genelist.append(HGNCGeneSymbl1)
genelist.append(HGNCGeneSymbl2)
genelist

In [None]:
new_data = genes_data[genes_data["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl"].isin(genelist) & (genes_data["eyeGENEGenomics.Genomics Information.GeneVariantInterpretTyp"] == GeneVariantInterpretTyp)].copy() 


In [None]:
new_data.head()

In [None]:
#updated data
querytest2=new_data[new_data["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl"]==HGNCGeneSymbl1].merge(new_data[((new_data["eyeGENEGenomics.Genomics Information.HGNCGeneSymbl"]==HGNCGeneSymbl2))][['eyeGENEGenomics.Main.GUID']], left_on="eyeGENEGenomics.Main.GUID",right_on="eyeGENEGenomics.Main.GUID", how="inner")[['eyeGENEGenomics.Main.GUID','eyeGENEGenomics.Genomics Information.HGNCGeneSymbl']]


In [None]:
uniqueGUID = querytest2["eyeGENEGenomics.Main.GUID"].unique()
updated = new_data[new_data["eyeGENEGenomics.Main.GUID"].isin(uniqueGUID)]

In [None]:
def create_folder(folder_path):
    adjusted_folder_path = folder_path
    folder_found = os.path.isdir(adjusted_folder_path)
    counter = 0
    while folder_found == True:
        counter = counter + 1
        adjusted_folder_path = folder_path + ' (' + str(counter) + ')'
        folder_found = os.path.isdir(adjusted_folder_path)
    os.mkdir(adjusted_folder_path)
    return adjusted_folder_path

x=dt.datetime.now()
new_dir = os.getcwd()+'\\'+"EyeGene_"+x.strftime('%Y_%m_%d')+"_outputfiles"
created_dir = create_folder(new_dir)




In [None]:
newfile = updated.to_excel(created_dir+"\\"+x.strftime('%Y_%m_%d')+ "genevariant.xlsx",index=False)
                           
uniqueGUIDsfile = pd.DataFrame(uniqueGUID,columns=["Unique GUIDs"]).to_csv(created_dir+"\\"+x.strftime('%Y_%m_%d')+ "_UniqueGUIDSwBothGenes.csv",index=False)

#print("The newfile: " + str(newfile) + "  has been created. Please reach out to your operations for support or questions")

In [None]:
print("The number of rows of data: " + str(len(genes_data)))
print("________________________________________________________")

print("The number of unique GUIDs that have both genes and the variant interpretation type: " + str(len(uniqueGUID)))

print("________________________________________________________")

print("The number of rows of data for GUIDs with both genes and variant interpretation type dataset:" + str(len(updated)))

print("________________________________________________________")

print("The files have been created in your folder  " + str(created_dir))


