In [2]:
import os
import re
import csv
import uuid
import glob
import shutil

In [3]:
# Variables
source_dir = "source_files/"

table_output_dir = "extracted_tables/"
table_code_dir = table_output_dir + "table_code/"
table_header_dir = table_output_dir + "table_header/"
table_result_file = "tables_regex.csv"

figure_output_dir = "extracted_figures/"
figure_result_file = "figures_regex.csv"

possible_extensions = [".pdf", ".png", ".jpg", ".jpeg", ".eps"]

os.makedirs(table_output_dir, exist_ok=True)
os.makedirs(table_header_dir, exist_ok=True)
os.makedirs(table_code_dir, exist_ok=True)
os.makedirs(figure_output_dir, exist_ok=True)

In [4]:
# Reset directories
files = glob.glob(table_code_dir + "*")
for f in files:
    os.remove(f)
    
files = glob.glob(table_header_dir + "*")
for f in files:
    os.remove(f)
    
files = glob.glob(figure_output_dir + "*")
for f in files:
    os.remove(f)

if os.path.isfile(table_output_dir + table_result_file):
    os.remove(table_output_dir + table_result_file)
    
if os.path.isfile(figure_output_dir + figure_result_file):
    os.remove(figure_output_dir + figure_result_file)

In [16]:
# TODO: Save paper name and authors as metadata
csvfile_table = open(table_output_dir + table_result_file, 'w', newline='', encoding="utf-8")
spamwriter_table = csv.writer(csvfile_table, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)

csvfile_figure = open(figure_output_dir + figure_result_file, 'w', newline='', encoding="utf-8")
spamwriter_figure = csv.writer(csvfile_figure, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)

for paper in os.listdir(source_dir):
    paper_path = source_dir + paper
    if os.path.isdir(paper_path):
        print("\n\n" + paper)
        tex_files = [x for x in os.listdir(paper_path) if x.endswith('.tex')]

        complete_tex = ""
        for file in tex_files:
            try:
                f = open(paper_path + "/" + file, "r", encoding="utf8")
                complete_tex += f.read()
                f.close()
            except UnicodeDecodeError as e:
                print("UnicodeDecodeError occurred. File could not be loaded.")
                continue

        found_document_header = re.search(r"\\documentclass\[.*\\begin\{document\}", complete_tex, re.DOTALL)
        paper_id = str(uuid.uuid4())
        if found_document_header:
            paper_header_file = table_header_dir + paper_id + ".txt"
            f = open(paper_header_file, "w", encoding="utf-8")
            f.write(found_document_header.group(0))
            f.close()
        else:
            print("Document header could not be identified.")
            continue

        found_tables = re.findall(r"\\begin\{table\*?\}.*?\\end\{table\*?\}", complete_tex, re.DOTALL)
        found_figures = re.findall(r"\\begin\{figure\*?\}.*?\\end\{figure\*?\}", complete_tex, re.DOTALL)

        for table in found_tables:
            #r"\\caption\{(.*?)}"
            #caption_match = re.search(r"\\caption\{(([^{}]*(\{[^{}]*\})?[^{}]*)+)\}", table)
            caption_match = re.search(r"\\caption\{(.*?)}", table)
            label_match = re.search(r"\\label\{(.*?)\}", table)
            if label_match:
                found_label = label_match.group(1)
                print(found_label)
                found_paragraphs = re.findall(fr".*\\ref\{{{re.escape(found_label)}}}.*", complete_tex)
                for paragraph in found_paragraphs:
                    print(paragraph)
            if caption_match:
                found_caption = caption_match.group(1)
                table_id = str(uuid.uuid4())

                spamwriter_table.writerow([table_id, paper_id, found_caption])
                table_file_path = table_code_dir + table_id + ".txt"
                f = open(table_file_path, "w", encoding="utf-8")
                f.write(table)
                f.close()

        for figure in found_figures:
            found_graphics = re.findall(r"\\includegraphics(\[.*?\])*\{(.*?)\}", figure)
            #caption_match = re.search(r"\\caption\{(([^{}]*(\{[^{}]*\})?[^{}]*)+)\}", figure)
            caption_match = re.search(r"\\caption\{(.*?)}", figure)
            if caption_match:
                found_caption = caption_match.group(1)
                for graphic in found_graphics:
                    figure_id = str(uuid.uuid4())
                    
                    graphic_path = graphic[1]
                    file_type = os.path.splitext(graphic_path)[-1]
                    if os.path.isfile(paper_path+"/"+graphic_path):
                        graphic_path = paper_path+"/"+graphic_path
                    else:
                        for ext in possible_extensions:
                            possible_path = paper_path+"/"+graphic_path + ext
                            if os.path.isfile(possible_path):
                                graphic_path = possible_path
                                break
                                
                    try:
                        shutil.copy(graphic_path, figure_output_dir+figure_id+file_type)
                        spamwriter_figure.writerow([figure_id, paper_id, found_caption])
                    except FileNotFoundError as e:
                        print(f"File not found: {graphic_path} - {e}")
                    except Exception as e:
                        print(f"An error occurred: {e}")
        
        print(f"{len(found_tables)} tables found.")
        print(f"{len(found_figures)} figures found.")
    #break
    
csvfile_table.close()
csvfile_figure.close()



2001.00116v2
tab:fpr_tpr
can help discriminate an AE that is across the decision boundary. In detail, we use an optimal threshold based on the ROC (receiver operating characteristic) curve, to split AEs and benign images distributions. Table~\ref{tab:fpr_tpr} presents the FPR and TPR (i.e., Detection Rate defined in Section~\ref{sec:eval}). \zedit{Note that the results are only for illustrating that E\&R imposes different impacts
tab:cw
As shown in Table~\ref{tab:cw}, the proposed technique achieves very high detection rates (up to 100\% on CIFAR-10, and 99.3\% on ImageNet) 
For another leading $L_2$ AE generation algorithm---DeepFool (see Section~\ref{sec:deepfool}), we observe very similar results as CW-$L_2$. Table~\ref{tab:cw} shows that our detector achieves very high detection rates (up to 99.8\% on CIFAR-10, and 95.0\% on ImageNet) with low FPR values. 
\fedit{For CW-$L_2$ attack, their experiments only examine $\kappa$ = 0.0, which is the default setting, so we also list the 

So far, the anomaly in all types of the local linear transformation of fermion fields\footnote{The local and linear transformation of $\psi(x),\bar{\psi}(x)$ must be $\delta\psi(x)=\alpha(x)\Omega\psi(x),\delta\bar{\psi}(x)=\alpha(x)\bar{\psi}(x)\widetilde{\Omega}$, where $\Omega$ and $\widetilde{\Omega}$ are a linear combination of $\gamma$ matrices and hence of $\mathbbm{1},\gamma^\mu,[\gamma^\mu,\gamma^\nu],\gamma^\mu\gamma_5,\gamma_5$. However, the traces of odd number $\gamma$ matrices are zero; i.e., $\mathrm{tr}[\gamma^\mu]\delta^4(x-x)$ and $\mathrm{tr}[\gamma^\mu,\gamma_5]\delta^4(x-x)$ are zero even after regularization.} (not all symmetry transformations) has been exhausted. There are only three non-zero anomalies; see Table.\ref{tab:1} (in Fujikawa's style for simplicity).
Table.\ref{tab:1} shows that the transverse anomalies have many more types of operators than the trace anomaly and chiral anomaly. In particular, the $C_{abc}F_b^{\mu\rho}F_{c\rho}^{\ \ \nu}$ term may hav

tbl:corpus_info
The dataset used to evaluate the models' performance is the Chatbot Natural Language Understanding (NLU) Evaluation Corpus, introduced by Braun et al.~\cite{braun2017evaluating} to test NLU services. It is a publicly available~\footnote{\url{https://github.com/sebischair/NLU-Evaluation-Corpora}} benchmark and is composed of sentences obtained from a German Telegram chatbot used to answer questions about public transport connections. The dataset has two intents, namely \textit{Departure Time} and \textit{Find Connection} with 100 train and 106 test samples, shown in Table \ref{tbl:corpus_info}. Even though English is the main language of the benchmark, this dataset contains a few German station and street names.
Another interesting observation in the Twitter dataset is that our proposed model more significantly improves the performance of class 1 (positive) with a small decrease of performance of class 0 (negative). However, the same pattern is not observed when we compa

When comparing the improvement in performance in the Twitter and TTS-STT Chatbot datasets, we notice that the former shows major improvements whilst the latter shows only minor improvements. We investigate if this is due to lower noise levels in the Twitter dataset. However, as can be seen in Table~\ref{tbl:noise_level_comparison_performance}, our model’s better performance in the Twitter dataset is not related to it having lower noise levels when compared to the TTS-STT Chatbot corpus. A possible reason as to why our model is able to improve its performance by a larger margin in the Twitter dataset can be due to BERT being trained on the Wikipedia and Book Corpus~\cite{devlin2018bert}. Twitter has arguably more noise when compared to BERT’s original training data due to its highly informal setting and character limitation. However, studies suggest that social media text has relatively small grammatical disparity when compared to edited text such as Wikipedia~\cite{baldwin2013noisy}, a

6 tables found.
19 figures found.


2001.00151v1
0 tables found.
5 figures found.


2001.00152v1
Table 1
%If $\lambda_n$ decays faster than $O_p(n^{-\frac{2m}{4m+d}})$, Theorem \ref{th:improvedrate} is not applicable. In this case, we can still invoke Corollary \ref{Coro:rate} to obtain slower rates of convergence. We summarize the details in Table \ref{Table 1}.
File not found: lexample_fig1 - [Errno 2] No such file or directory: 'lexample_fig1'
1 tables found.
2 figures found.


2001.00153v1
Document header could not be identified.


2001.00154v1
0 tables found.
6 figures found.


2001.00160v1
0 tables found.
5 figures found.


2001.00161v1
tab:parametersBC
Three sets of parameters of the charm and bottom system corresponding to the varying of the interaction width are listed in Table~\ref{tab:parametersBC}.
\caption{\label{tab:massccbar} Masses (in MeV) of the charmonium with $J^{PC}\,=\,0^{-+},\,1^{--},\,0^{++},\,1^{+-},\,1^{++},\,2^{++}$, the normal states in the quark model. $M^{

corr_irHI
Figures \ref{3plots_irHI8} \& \ref{7plots_irHI8} show the gas-dust correlations for the 8 $\mu$m dust band in NGC 3184 and NGC 7793, respectively. The four panels in Figure \ref{3plots_irHI8} correspond to the separated warm/cold HI gas, total HI gas, molecular CO gas and total (atomic+molecular) gas column densities with respect to 8 $\mu$m dust emission in NGC 3184. The spiral arm locations have been marked separately in the total gas-dust correlation plot for NGC 3184 which have been identified using a combination of 3.6 $\mu$m and H$\alpha$ images available in the SINGS data archive \citep{Kennicutt+2003}. Since the CO data was unavailable for NGC 7793 in the public archives, we have only shown the separated warm/cold HI and the total HI gas column density vs 8 $\mu$m dust emission for this galaxy in Figure \ref{7plots_irHI8}. Being a flocculent spiral, the arms in NGC 7793 are distributed throughout the galaxy and hence we haven't been able to separate them from the inte

We firstly calculate the total and partial widths of the open charm two-body strong decays of pure $\psi(6S)$ and $\psi(5D)$ states, which are summarized in Table \ref{tab:table1}. The total width of $\psi(6S)$ is estimated to be 28.5 MeV, which is in full accordance with the fitted width of $29.8\pm8$ MeV of the charmoniumlike $Y_1$ state. After considering the $S$-$D$ mixing, we find the total widths of $\psi'_{6S-5D}$ and $\psi''_{6S-5D}$ are not sensitive to the mixing angle as shown in Fig. \ref{fig:mixing}. Thus, both mass and width of the $\psi'_{6S-5D}$ dominated by the $c\bar{c}$ component of $6^3S_1$ are consistent with the $Y_1$ state. This gives us
When the mixing angle of the $6S$-$5D$ mixture is $\pm34^{\circ}$, the mass and total width of $\psi''_{6S-5D}$ are 4675 and 30 MeV, respectively, where the theoretical mass is close to 4676$\pm$7 MeV of the fitted $Y_2$ state, but the width is smaller than the fitted result of $85.7\pm15$ MeV. However, we should emphasize here t

Different studies have recently evaluated the difficulty of detecting whether faces are real of artificially generated. Table~\ref{table:relatedWorks_entireFaceSynthesis} shows a comparison of the most relevant approaches in this area. For each study, we include information related to the method, classifiers, best performance, and databases considered. We highlight in \textbf{bold} the best results achieved for each public database. It is important to remark that in some cases, different evaluation metrics are considered, e.g., Area Under the Curve (AUC) or Equal Error Rate (EER), which complicates the comparison among the studies.
Fake detection systems inspired in steganalysis have also been studied. Nataraj \textit{et al.} proposed in~\cite{nataraj2019detecting} a detection system based on a combination of pixel co-occurrence matrices and Convolutional Neural Networks (CNN). Their proposed approach was initially tested through a database of various objects and scenes created through

2 tables found.
21 figures found.


2001.00202v3
0 tables found.
0 figures found.


2001.00203v1
tab1
These operators are evaluated using completely symmetric symmetric spin-isospin states $\vert W_B\rangle$~\cite {Clo}. We obtained the quark model matrix elements up to second order corrections (two-body terms) listed in Table~\ref{tab1}, 
To derive from the quark level matrix elements in Table~\ref{tab1} the 
tab2
Table~\ref{tab2} lists the various couplings 
quadmo
In Tables~\ref{quadmo} and ~\ref{transquad}
Tables~\ref{quadmo} and~\ref{transquad}, it is straightforward to verify 
Tables~\ref{quadmo} and~\ref{transquad} can be written down if desirable.
Table~\ref{quadmo} with $B=r_n^2/4$ and $C=0$. 
transquad
In Tables~\ref{quadmo} and ~\ref{transquad}
Tables~\ref{quadmo} and~\ref{transquad}, it is straightforward to verify 
Tables~\ref{quadmo} and~\ref{transquad} can be written down if desirable.
to the analytic expressions in Table~\ref{transquad}.} 
quadmonum
are listed in Tables

In [None]:
csvfile_table.close()
csvfile_figure.close()