# Retrieve Information for Reactome Failed Searches

### Purpose
The notebook processes the failed query terms to get the PMIDs where the term was seen.
Using these PMIDs MESH terms and article metadata will be extracted and presented in a tab seperatted file.
### How to Run
Enter MTI credentials in cell tagged "parameters"  
All code cells needs to be run sequentially


### Set Parameters

In [None]:
# Register at https://utslogin.nlm.nih.gov/cas/login
mti = {
    'email_id' : "example@example.com",
    'username' : "username",
    'password' : "password"
}
pmid_threshold = 20
indra_db_rest_url = "SET_INDRA_DB_URL"

reactome_failed_terms_link = "https://gist.githubusercontent.com/PritiShaw/03ce10747835390ec8a755fed9ea813d/raw/cc72cb5479f09b574e03ed22c8d4e3147e09aa0c/Reactome.csv"
failed_query_threshold = None # None indicates no limit
failed_query_output_file_path = "failed_query_analysis_output.tsv"
failed_query_hits_threshold = 10

### Additional Steps to run in Google Colaboratory for Pyjnius

%%capture  
!pip install -r ./dependencies/requirements.txt

!mkdir -p /usr/lib/jvm/java-1.11.0-openjdk-amd64/jre/lib/amd64/server/  
!ln -s /usr/lib/jvm/java-1.11.0-openjdk-amd64/lib/server/libjvm.so /usr/lib/jvm/java-1.11.0-openjdk-amd64/jre/lib/amd64/server/libjvm.so

import os  
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"

### Setup

In [None]:
import jnius_config
jnius_config.add_classpath("./lib/*")
from jnius import autoclass
GenericBatchNew = autoclass("GenericBatchNew")

import logging
logging.basicConfig()
logging.getLogger().setLevel(logging.WARNING)

### Driver Function

In [None]:
import requests
import os
import datetime
from reactome_query_utils.getPMID import getPMID
from reactome_query_utils.generateOutput import mergeOutputs
from reactome_query_utils.getEUtilsInfo import getEUtilsInfo
from reactome_query_utils.getMeSH import getMeSH

from tqdm import tqdm

os.environ["INDRA_DB_REST_URL"] = indra_db_rest_url


history_file_path = "/tmp/history"


def saveInHistory(terms):
    """
    Save processed terms in file
    Parameters
    ----------
    terms:  []
        List of processed terms
    """
    with open(history_file_path, "a") as out_file:
        out_file.write('\n'.join(terms)+'\n')


if __name__ == "__main__":
    history = set()

    if os.path.isfile(history_file_path):
        with open(history_file_path, "r") as history_file:
            for line in history_file:
                history.add(line.strip())

    terms = [[]]

    terms_request = requests.get(reactome_failed_terms_link)
    inp_terms = terms_request.text.splitlines()

    if failed_query_threshold:
        failed_query_threshold = failed_query_threshold + 1 # First line is column header

    for term in inp_terms[1:failed_query_threshold]:
        term_parts = term.split(",")
        if len(term_parts) == 2 and int(term_parts[1]) >= failed_query_hits_threshold and term not in history:
            terms[-1].append(term)
            if len(terms[-1]) == 10:
                terms.append([])

    batch = GenericBatchNew()
    
    pmid_list_path = "pmid_list.txt"
    eutils_output_path = "eutils_output.tsv"
    mesh_output_path = "mesh.txt"
    abstract_filepath = "abstract.txt" 

    for chunk in tqdm(terms, ascii=True):
        getPMID(chunk, pmid_threshold,pmid_list_path)
        getMeSH(mti['email_id'],mti['username'],mti['password'], batch, abstract_filepath = abstract_filepath)
        getEUtilsInfo(pmid_list_path)

        mergeOutputs(failed_query_output_file_path, eutils_output_path, mesh_output_path)
        history.update(chunk)
        saveInHistory(chunk)
        os.system("rm " + " ".join([eutils_output_path,abstract_filepath,mesh_output_path,pmid_list_path]))