# Processing Queries

### 1) Reading a configuration file

At this point the module will read into a '.cfg' file containing the names of three files that will be used. Only the first need to be provided, the other two are going to be created by the module.
The files format are:

* XML file for reading (provided)
* CSV file with the queries (created)
* CSV file with the results (created)

In [None]:
import logging

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)

In [13]:
def read_config(config_file):
    read_file = "../data/CysticFibrosis/"
    queries_file = "../results/"
    expected_file = "../results/"
    cfg_path = "../data/" + config_file

    with open(cfg_path, "r") as config_file:
        for line in config_file.readlines():
            instruction, filename = line.split("=")
            filename = filename.strip()

            if instruction == "LEIA":
                read_file += filename
            elif instruction == "CONSULTAS":
                queries_file += filename
            elif instruction == "ESPERADOS":
                expected_file += filename
                
    return (read_file, queries_file, expected_file)

In [14]:
read, queries, expected = read_config("pc.cfg")

### 2) Reading the XML file

In [15]:
from xml.etree import ElementTree as ET

def get_xml_root(path):
    xml_file = ET.parse(path)
    xml_root = xml_file.getroot()
    return xml_root

In [16]:
xml_root = get_xml_root(read)

### 3) Generating files

In [17]:
def get_queries_file(path, xml_root):
    with open(path, 'w') as queries:
        queries.write("QueryNumber;QueryText\n")

        for query in xml_root:
            query_number = ""
            query_text = ""
            for element in query:
                if element.tag == "QueryNumber":
                    query_number = int(element.text)
                elif element.tag == "QueryText":
                    query_text = element.text.upper()
                    query_text = query_text.replace('\n  ', '')
                    query_text = query_text.replace(';', '')
            queries.write(f"{query_number};{query_text}")

In [18]:
get_queries_file(queries, xml_root)

In [19]:
def get_expected_file(path, xml_root):
    with open(path, 'w') as expected:
        expected.write("QueryNumber;DocNumber;DocVotes\n")

        for query in xml_root:
            query_number = ""
            for element in query:
                if element.tag == "QueryNumber":
                    query_number = int(element.text)
                elif element.tag == "Records":
                    for item in element:
                        doc_number = int(item.text)
                        score = item.attrib['score'].replace('0', '')
                        doc_votes = len(score)
                        expected.write(f"{query_number};{doc_number};{doc_votes}\n")

In [20]:
get_expected_file(expected, xml_root)