## 1. Importing third party libraries and the utils module

In [1]:
import pandas as pd
import re

from utils import *

## 2. Reading the paper title and techniques

In [2]:
title_and_techniques_df = DataExtraction().get_paper_title_and_techniques()

## 3. Reading the bibtex

After having manually created the bibtex with the references to all papers included in the study, we read it in order to generate our synthesis table.  
This is performed using a regular expression that matches the paper title and citation key.

In [3]:
bibtex = read_bibtex()
pattern = re.compile(r'@\w+\{(\w+\d+\w+),[\s\S]*?\btitle\s*=\s*\{(.*)\},')
title_map = {paper.lower(): key for key, paper in re.findall(pattern, bibtex)}

## 4. Creating the synthesis table

To create the synthesis table, we iterate over all rows of the DataFrame containing the titles and techniques.  
At each iteration, we first get the technique from the Data Extraction Form responses.  
Then, we try to abbreviate the PMC technique by looking at the pmc_abbreviations dictionary.  
Following that we replace "Not Available" entries with (NA). This allows us to use the vms_pattern to find the method abbreviation inside parentheses.  
The vmp abbreviations are applied afterwards.  
Finally, we print the row containing the citation and the techniques.

In [4]:
vms_pattern = re.compile(r'\((\w*)\)*')

count = 0
problems = []
offset1 = 25
offset2 = 25
offset3 = 35
offset4 = 1

new_line = '\\\\'

split = 45

table_header = (
'''
\\begin{table}[htb!]
\t\\footnotesize
\t\\caption{}
\t\\begin{tabular}{@{}llll@{}}
\t\t\\toprule
\t\tReference                       & PMC                       & VMS                                 & VMP                                   \\\\
\t\t\\midrule\
''')

table_footer = (
'''
\t\t\\bottomrule
\t\\end{tabular}
\\end{table}\
'''
)

with open(f'{OUTPUT_PATH}/SynthesisTable.tex', 'w') as file:
    i = 0
    for index, row in title_and_techniques_df.iterrows():

        if i == 0 or i == split:
            file.write(table_header)
            print(table_header, end='')

        pmc = row[DataExtraction.PMC_TECHNIQUE]
        vms = row[DataExtraction.VMS_TECHNIQUE]
        vmp = row[DataExtraction.VMP_TECHNIQUE]

        for term, abbreviation in Abbreviations.PMC.items():
            pmc = pmc.replace(term, abbreviation)

        vms = re.sub(r'Not (A|a)vailable', '(NA)', vms)
        vms = ' + '.join(re.findall(vms_pattern, vms))

        for term, abbreviation in Abbreviations.VMP.items():
            vmp = vmp.replace(term, abbreviation)


        formatted_row = f'\n\t\t\\cite{{{title_map[row[DataExtraction.PAPER_NAME].lower()] + "}":<{offset1}} & {pmc:<{offset2}} & {vms:<{offset3}} & {vmp:<{offset3}} {"":<{offset4}} {new_line}'

        file.write(formatted_row)
        print(formatted_row, end='')
        
        i += 1

        if i == split:
            file.write(table_footer)
            print(table_footer, end='')

    file.write(table_footer)
    print(table_footer, end='')


\begin{table}[htb!]
	\footnotesize
	\caption{}
	\begin{tabular}{@{}llll@{}}
		\toprule
		Reference                       & PMC                       & VMS                                 & VMP                                   \\
		\midrule
		\cite{tarahomi2019prediction}   & ST + P (LR + WMA)         & MMT + MC + MRR + MDM                & H (HPNBFD) + P (EPA)                  \\
		\cite{alsadie2018dtfa}          & DT + P (ARIMA)            & NA                                  & NA                                    \\
		\cite{zhou2018virtual}          & ST + P (AR)               & UR                                  & R                                     \\
		\cite{daraghmeh2018linear}      & ST + P (LinLogR)          & MC + MMT + MU + RS                  & H (PABFD)                             \\
		\cite{aryania2018energy}        & NA                        & NA                                  & MH (ACS)                              \\
		\cite{wang2018resource}         & ST     