In [226]:
import numpy as np
import pandas as pd

# Результаты расчета в скрипте

Согласно README.md запустили следующий скрипт:

```bash
python3 -m venv dsp_env
source dsp_env/bin/activate
poetry install
pre-commit install
pre-commit run -a
python3 dsp_project/hw2_mixer.py
```

Получили следующие метрики:

|    | file          |   SNR, dB |      SDR |      SI-SDR |    PESQ |
|---:|:--------------|----------:|---------:|------------:|--------:|
|  0 | test_file.wav |      None |   inf    | 189.23      | 4.54864 |
|  1 | mixed_-5.wav  |        -5 | -3.01167 |  -5.04481   | 1.03564 |
|  2 | mixed_0.wav   |         0 |  1.2391  |  -0.0251546 | 1.05336 |
|  3 | mixed_5.wav   |         5 |  5.67905 |   4.98588   | 1.10321 |
|  4 | mixed_10.wav  |        10 | 10.321   |   9.99208   | 1.24552 |

Проведем расчет оставшихся метрик, а также заполним результаты MOS.

In [232]:
script_metric_vals = [
    {
        "PESQ": 4.548638343811035,
        "SI-SDR": 189.22959899902344,
        "SDR": None,
        "file": "test_file.wav",
        "SNR, dB": None,
    },
    {
        "PESQ": 1.035638451576233,
        "SI-SDR": -5.044810771942139,
        "SDR": -3.011665105819702,
        "file": "mixed_-5.wav",
        "SNR, dB": -5,
    },
    {
        "PESQ": 1.0533638000488281,
        "SI-SDR": -0.025154566392302513,
        "SDR": 1.2390981912612915,
        "file": "mixed_0.wav",
        "SNR, dB": 0,
    },
    {
        "PESQ": 1.1032123565673828,
        "SI-SDR": 4.985879421234131,
        "SDR": 5.679045677185059,
        "file": "mixed_5.wav",
        "SNR, dB": 5,
    },
    {
        "PESQ": 1.2455214262008667,
        "SI-SDR": 9.992077827453613,
        "SDR": 10.321009635925293,
        "file": "mixed_10.wav",
        "SNR, dB": 10,
    },
]

In [245]:
metric_vals_df = pd.DataFrame(script_metric_vals)[
    ["file", "SNR, dB", "SDR", "SI-SDR", "PESQ"]
]
metric_vals_df.to_csv("./metrics.csv")

# Расчет прочих метрик

## NISQA

Склонируем репозиторий:

```bash
git clone https://github.com/gabrielmittag/NISQA.git
```

Воспользуемся скриптом из [репозитория](https://github.com/gabrielmittag/NISQA?tab=readme-ov-file#using-nisqa), предварительно сохранив датафрейм выше в csv-формате:

```bash
python run_predict.py --mode predict_csv --pretrained_model weights/nisqa.tar --csv_file files.csv --csv_deg column_name_of_filepaths --num_workers 0 --bs 10 --output_dir /path/to/dir/with/results
```

In [253]:
nisqa_results = pd.read_csv("NISQA_results.csv").iloc[:, 3:-1]
nisqa_results.drop(["SDR", "SI-SDR", "PESQ"], axis=1, inplace=True)
nisqa_results

Unnamed: 0,"SNR, dB",mos_pred,noi_pred,dis_pred,col_pred,loud_pred
0,,4.706658,4.499089,4.600805,4.408635,4.54724
1,-5.0,0.912774,1.302374,3.48432,2.032331,1.818125
2,0.0,1.632286,1.347787,4.297644,2.954377,2.634257
3,5.0,2.057696,1.445465,4.409371,3.516179,3.278281
4,10.0,2.312069,1.590357,4.491841,3.960564,3.528837


In [255]:
metric_vals_df = pd.merge(metric_vals_df, nisqa_results, on="SNR, dB")
metric_vals_df

Unnamed: 0,file,"SNR, dB",SDR,SI-SDR,PESQ,mos_pred,noi_pred,dis_pred,col_pred,loud_pred
0,test_file.wav,,,189.229599,4.548638,4.706658,4.499089,4.600805,4.408635,4.54724
1,mixed_-5.wav,-5.0,-3.011665,-5.044811,1.035638,0.912774,1.302374,3.48432,2.032331,1.818125
2,mixed_0.wav,0.0,1.239098,-0.025155,1.053364,1.632286,1.347787,4.297644,2.954377,2.634257
3,mixed_5.wav,5.0,5.679046,4.985879,1.103212,2.057696,1.445465,4.409371,3.516179,3.278281
4,mixed_10.wav,10.0,10.32101,9.992078,1.245521,2.312069,1.590357,4.491841,3.960564,3.528837


## DNSMOS

Для расчета локально скопируем файл `dnsmos_local.py` и воспользуемся [инструкцией](https://github.com/microsoft/DNS-Challenge/tree/master/DNSMOS#to-use-the-local-evaluation-method):

```bash
python dnsmos_local.py -t audio_path -o sample.csv
```

In [268]:
dnsmos_results = pd.read_csv("dnsmos_sample.csv").iloc[:, 1:]
dnsmos_results["filename"] = dnsmos_results["filename"].apply(
    lambda x: x[x.rfind("/") + 1 :]
)
dnsmos_results.columns = [
    "file",
    "len_in_sec",
    "sr",
    "num_hops",
    "OVRL_raw",
    "SIG_raw",
    "BAK_raw",
    "OVRL",
    "SIG",
    "BAK",
    "P808_MOS",
]
dnsmos_results

Unnamed: 0,file,len_in_sec,sr,num_hops,OVRL_raw,SIG_raw,BAK_raw,OVRL,SIG,BAK,P808_MOS
0,mixed_10.wav,10.0,16000,1,2.547831,4.126624,2.118581,2.448811,3.613215,2.422099,2.857759
1,mixed_5.wav,10.0,16000,1,2.151239,3.94959,1.78025,2.132525,3.517146,2.051356,2.707154
2,mixed_0.wav,10.0,16000,1,1.259222,1.737673,1.180893,1.343354,1.873107,1.320582,2.343981
3,mixed_-5.wav,10.0,16000,1,1.020842,1.03436,1.035168,1.114226,1.178189,1.128608,2.287031
4,test_file.wav,10.0,16000,1,4.041115,4.210765,4.539121,3.448771,3.657029,4.195249,4.146636


In [273]:
metric_vals_df = pd.merge(
    metric_vals_df,
    dnsmos_results.drop(["sr", "len_in_sec", "num_hops"], axis=1),
    on="file",
)
metric_vals_df

Unnamed: 0,file,"SNR, dB",SDR,SI-SDR,PESQ,mos_pred,noi_pred,dis_pred,col_pred,loud_pred,OVRL_raw,SIG_raw,BAK_raw,OVRL,SIG,BAK,P808_MOS
0,test_file.wav,,,189.229599,4.548638,4.706658,4.499089,4.600805,4.408635,4.54724,4.041115,4.210765,4.539121,3.448771,3.657029,4.195249,4.146636
1,mixed_-5.wav,-5.0,-3.011665,-5.044811,1.035638,0.912774,1.302374,3.48432,2.032331,1.818125,1.020842,1.03436,1.035168,1.114226,1.178189,1.128608,2.287031
2,mixed_0.wav,0.0,1.239098,-0.025155,1.053364,1.632286,1.347787,4.297644,2.954377,2.634257,1.259222,1.737673,1.180893,1.343354,1.873107,1.320582,2.343981
3,mixed_5.wav,5.0,5.679046,4.985879,1.103212,2.057696,1.445465,4.409371,3.516179,3.278281,2.151239,3.94959,1.78025,2.132525,3.517146,2.051356,2.707154
4,mixed_10.wav,10.0,10.32101,9.992078,1.245521,2.312069,1.590357,4.491841,3.960564,3.528837,2.547831,4.126624,2.118581,2.448811,3.613215,2.422099,2.857759


## MOS

- Считали, опросив несколько знакомых (микро-краудсорс).
- Аудиодорожки отправили в том же порядке, что они представлены в датафрейме с метриками.
- Ответ представлялся в виде оценки по системе MOS через пробел.

In [284]:
estimates = """
5 3 4 5 5
5 1 2 3 4
5 1 2 3 4
5 2 2 3 3
5 3 3 4 4
3 1 1 1 1
5 2 2 2 2
5 1 2 3 4
5 3 5 5 5
5 2 3 4 4
5 2 2 3 4
"""

estimates = estimates.split("\n")[1:-1]

In [292]:
split_estimates = [np.fromiter(map(int, user.split(" ")), int) for user in estimates]
estimates_by_file = np.array(split_estimates).transpose()

In [295]:
mos_estimates = np.fromiter(map(np.mean, estimates_by_file), float)
mos_estimates

array([4.81818182, 1.90909091, 2.54545455, 3.27272727, 3.63636364])

In [296]:
metric_vals_df["MOS"] = mos_estimates

**Итоговые получившиеся оценки:**

In [297]:
metric_vals_df

Unnamed: 0,file,"SNR, dB",SDR,SI-SDR,PESQ,mos_pred,noi_pred,dis_pred,col_pred,loud_pred,OVRL_raw,SIG_raw,BAK_raw,OVRL,SIG,BAK,P808_MOS,MOS
0,test_file.wav,,,189.229599,4.548638,4.706658,4.499089,4.600805,4.408635,4.54724,4.041115,4.210765,4.539121,3.448771,3.657029,4.195249,4.146636,4.818182
1,mixed_-5.wav,-5.0,-3.011665,-5.044811,1.035638,0.912774,1.302374,3.48432,2.032331,1.818125,1.020842,1.03436,1.035168,1.114226,1.178189,1.128608,2.287031,1.909091
2,mixed_0.wav,0.0,1.239098,-0.025155,1.053364,1.632286,1.347787,4.297644,2.954377,2.634257,1.259222,1.737673,1.180893,1.343354,1.873107,1.320582,2.343981,2.545455
3,mixed_5.wav,5.0,5.679046,4.985879,1.103212,2.057696,1.445465,4.409371,3.516179,3.278281,2.151239,3.94959,1.78025,2.132525,3.517146,2.051356,2.707154,3.272727
4,mixed_10.wav,10.0,10.32101,9.992078,1.245521,2.312069,1.590357,4.491841,3.960564,3.528837,2.547831,4.126624,2.118581,2.448811,3.613215,2.422099,2.857759,3.636364
