In [1]:
import pandas as pd
from pathlib import Path

from csv_file_processor import CSVFileProcessor

BASE_DIR = Path("/run/user/1000/gvfs/smb-share:server=fs-s-nas01.rrz.uni-hamburg.de,share=gw-gw-sm")

In [None]:
germany_csv_dir = BASE_DIR / "techno-csv" / "germany"
germany_result_file = "results_channel_correlation_germany.csv"

usa_csv_dir = BASE_DIR / "techno-csv" / "usa"
usa_result_file = "results_channel_correlation_usa.csv"

correlation_threshold = 0.99

def audio_mean_channel_correlation(file: Path):
    df = pd.read_csv(file)
    return df.loc[:, "ChannelCorrelation"].mean()

def channel_correlation_extraction(result_file: str, processor: CSVFileProcessor):
    with open(result_file, "w") as f:
        f.write("csv_file;audio_file;correlation;is_mono\n")

        for file, correlation in processor.process():
            is_mono = correlation >= correlation_threshold
            csv_file_path = str(file.relative_to(BASE_DIR))
            audio_file_path = str(Path(csv_file_path).with_suffix("")).replace("techno-csv", "techno-audio")
            f.write(f"\"{csv_file_path}\";\"{audio_file_path}\";{correlation};{is_mono}\n")

usa_processor = CSVFileProcessor(usa_csv_dir, audio_mean_channel_correlation)
channel_correlation_extraction(usa_result_file, usa_processor)

germany_processor = CSVFileProcessor(germany_csv_dir, audio_mean_channel_correlation)
channel_correlation_extraction(germany_result_file, germany_processor)

In [77]:
def get_mono_files(file: Path):
    df = pd.read_csv(file, sep=";")
    return df[df["is_mono"] == True]

usa_mono_files = get_mono_files(Path(usa_result_file))
usa_mono_files[["audio_file", "correlation"]].to_csv("usa_mono_files.csv", index=False)

germany_mono_files = get_mono_files(Path(germany_result_file)))
germany_mono_files[["audio_file", "correlation"]].to_csv("germany_mono_files.csv", index=False)