# String processing

**Benchmarking tests**

## Requirements

### Modules

In [1]:
from os import getcwd
from sys import path as sys_path
sys_path.append(getcwd())

In [2]:
import patternMatching

In [3]:
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

## Definitions

In [4]:
def handle_txt(file_name):
    try:
        with open(f"text_examples/text{file_name}.txt", 'r', encoding="utf-8") as file:
            return ''.join(file.readlines())
    except FileNotFoundError:
        raise FileNotFoundError(f"No such file or directory has been found: text_examples/{file_name}")

## Loading the texts

In [5]:
texts = {i: [patternMatching.ExactMatching(f"text{i}.txt", "sentido"),
             patternMatching.FuzzyMatching(f"text{i}.txt", "sentido")] for i in range(6)}
lengths = [len(texts[text][0].text) for text in texts.keys()]

In [6]:
lengths

[566, 931, 1405, 2022, 4879, 144769]

## Creating the `pd.DataFrame` object

In [7]:
algorithms = ["Brute-force", "BMH", "BMHS", "Exact shift-and",
              "Approximate shift-and (k = 1)", "Approximate shift-and (k = 2)"]
executions = 100

In [8]:
index = pd.MultiIndex.from_product([texts.keys(), algorithms, list(range(executions))],
                                   names=["Text", "Algorithm", "Execution"])
columns = ["Time elapsed"]
results = pd.DataFrame(index=index, columns=columns)

In [9]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Time elapsed
Text,Algorithm,Execution,Unnamed: 3_level_1
0,Brute-force,0,
0,Brute-force,1,
0,Brute-force,2,
0,Brute-force,3,
0,Brute-force,4,
...,...,...,...
5,Approximate shift-and (k = 2),95,
5,Approximate shift-and (k = 2),96,
5,Approximate shift-and (k = 2),97,
5,Approximate shift-and (k = 2),98,
