In [4]:
import re
import pandas as pd
from pathlib import Path

from csv_file_processor import CSVFileProcessor

BASE_DIR = Path("/run/user/1000/gvfs/smb-share:server=fs-s-nas01.rrz.uni-hamburg.de,share=gw-gw-sm")

In [None]:
germany_csv_dir = BASE_DIR / "techno-csv" / "germany"
germany_result_file = "results_bpm_year_germany.csv"

usa_csv_dir = BASE_DIR / "techno-csv" / "usa"
usa_result_file = "results_bpm_year_usa.csv"


def bpm_tempo(file: Path):
    df = pd.read_csv(file)
    return df.loc[:, "bpm"][0]


def extract_year(file: Path):
    pattern = r"\((\d{4})\)"
    match = re.search(pattern, file.name)

    if match:
        return match.group(1)


def bpm_year_extraction(result_file: str, processor: CSVFileProcessor):
    with open(result_file, "w") as f:
        f.write("csv_file;audio_file;bpm;year\n")

        for file, bpm_year in processor.process():
            bpm, year = bpm_year
            csv_file_path = str(file.relative_to(BASE_DIR))
            audio_file_path = str(Path(csv_file_path).with_suffix("")).replace("techno-csv", "techno-audio")
            f.write(f'"{csv_file_path}";"{audio_file_path}";{bpm};{year}\n')


extractor = lambda file: (bpm_tempo(file), extract_year(file))
usa_processor = CSVFileProcessor(usa_csv_dir, extractor)
bpm_year_extraction(usa_result_file, usa_processor)

germany_processor = CSVFileProcessor(germany_csv_dir, extractor)
bpm_year_extraction(germany_result_file, germany_processor)