In [1]:
import pandas as pd
from pathlib import Path

In [12]:
def extract_information(file_path):
    """
    Extracts the implementation type (CPU or GPU), the argument value from the filename,
    and the execution time from the content of the file.
    """
    # Extract implementation and number of docs from filename
    parts = file_path.stem.split('-')  # split the stem part of the filename
    implementation = parts[1].upper()  # Get 'GPU' or 'CPU' and make it uppercase
    n_docs = int(parts[2])  # Convert the number of documents part to integer
    
    # Read execution time from file
    with file_path.open('r') as file:
        for line in file:
            if "Elapsed time:" in line:
                execution_time = float(line.split()[-2])  # Get the last second to last part assuming "Elapsed time: X ms"
                break
    
    return dict(implementation=implementation, n_docs=n_docs, execution_time=execution_time)


In [13]:
# Collect all txt files matching the pattern
files = Path('../results/').glob('out-*.txt')

# Extract information from each file and create a DataFrame
data = [extract_information(file) for file in files]
df = pd.DataFrame.from_records(data)

print(df)

  implementation  n_docs  execution_time
0            GPU     100         229.766
1            GPU      10         231.431
2            CPU    1000          63.606
3            GPU   10000         256.250
4            CPU      10           0.925
5            CPU     100          10.549
6            CPU   10000         549.289
7            GPU    1000         236.207


In [15]:
df.sort_values(by=['implementation', 'n_docs'])

Unnamed: 0,implementation,n_docs,execution_time
4,CPU,10,0.925
5,CPU,100,10.549
2,CPU,1000,63.606
6,CPU,10000,549.289
1,GPU,10,231.431
0,GPU,100,229.766
7,GPU,1000,236.207
3,GPU,10000,256.25
