In [7]:
import pandas as pd
pd.set_option('display.max_rows', 6)

import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams.update({
    'font.family': 'serif',  # 使用衬线字体
    'font.serif': ['Times New Roman'],  # 指定 Times New Roman 字体
    'font.size': 11,
    'text.usetex': False,
    'axes.labelsize': 11,
    'axes.titlesize': 11,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11,
    'legend.fontsize': 9,
    'legend.title_fontsize': 9,
    'savefig.bbox': 'tight',
    'lines.linewidth': 2,
    'lines.markersize': 6,
    'grid.linewidth': 1,
    'axes.linewidth': 1
})
sns.set_style("whitegrid")
flierprops = dict(marker='o', markersize=2, linestyle='none')

table_dir = "/home/zhaoy/asset-fastCAE/results/vvenc/tables"
fig_dir   = "/home/zhaoy/asset-fastCAE/results/vvenc/figs"

In [8]:
def rm_outliers(df, cols, threshold=1.5, lower_only=True):
    for col in cols:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1

        lower_bound = Q1 - (threshold * IQR)
        upper_bound = Q3 + (threshold * IQR)

        if lower_only:
            df = df[df[col] >= lower_bound]
        else:
            df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]

    return df

#### 2. 预测的是 vmaf

In [9]:
""" 把每个序列所有的 dataframe 叠在一起 """
import os
import re
import pandas as pd
from tqdm import tqdm

seqs = os.listdir(f"{table_dir}/BDBR-fitOnly/rd-vmaf")
convex_df = pd.DataFrame()

for seq in seqs:
    for f in tqdm(os.listdir(f"{table_dir}/BDBR-fitOnly/rd-vmaf/{seq}")):
        convex_df = pd.concat([convex_df, pd.read_csv(f"{table_dir}/BDBR-fitOnly/rd-vmaf/{seq}/{f}")], axis=0)

convex_df = convex_df.reset_index(drop=True) 
display(convex_df)

convex_df.to_csv(f"{table_dir}/BDBR-fitOnly/convex_vmaf.csv", index=False)

100%|██████████| 9/9 [00:00<00:00, 552.59it/s]
100%|██████████| 9/9 [00:00<00:00, 566.58it/s]
100%|██████████| 9/9 [00:00<00:00, 549.49it/s]
100%|██████████| 9/9 [00:00<00:00, 560.34it/s]
100%|██████████| 9/9 [00:00<00:00, 565.37it/s]
100%|██████████| 9/9 [00:00<00:00, 562.19it/s]
100%|██████████| 9/9 [00:00<00:00, 567.27it/s]
100%|██████████| 9/9 [00:00<00:00, 563.08it/s]
100%|██████████| 9/9 [00:00<00:00, 557.00it/s]
100%|██████████| 9/9 [00:00<00:00, 560.74it/s]
100%|██████████| 9/9 [00:00<00:00, 551.59it/s]
100%|██████████| 9/9 [00:00<00:00, 550.42it/s]
100%|██████████| 9/9 [00:00<00:00, 546.96it/s]
100%|██████████| 9/9 [00:00<00:00, 548.79it/s]
100%|██████████| 9/9 [00:00<00:00, 545.38it/s]
100%|██████████| 9/9 [00:00<00:00, 545.46it/s]
100%|██████████| 9/9 [00:00<00:00, 540.93it/s]
100%|██████████| 9/9 [00:00<00:00, 541.28it/s]
100%|██████████| 9/9 [00:00<00:00, 540.81it/s]
100%|██████████| 9/9 [00:00<00:00, 540.48it/s]
100%|██████████| 9/9 [00:00<00:00, 537.43it/s]
100%|████████

Unnamed: 0,seqName,sceneId,func,preset,size,qp,pred_vmaf,log2vmaf,vmaf,pred_log2bitrate,log2bitrate,bitrate,pred_bitrate,convex
0,Lecture-5aee,17,quadratic2,medium,1080P,52,42.270678,5.452189,43.779672,5.466374,5.958641,62.1913,44.212235,pred
1,Lecture-5aee,17,quadratic2,medium,1080P,22,100.179548,6.617565,98.194124,12.495681,12.751314,6894.9026,5775.303574,pred
2,Lecture-5aee,17,quadratic2,medium,540P,22,99.791569,6.619314,98.313284,11.046069,11.674140,3267.8817,2114.452826,pred
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63543,Lecture-2ed0,6,linear,medium,360P,27,93.265134,6.544030,93.314516,6.425991,6.517740,91.6295,85.983703,fixed
63544,Lecture-2ed0,6,linear,medium,540P,27,93.295735,6.550998,93.766336,6.987135,7.199487,146.9811,126.863622,fixed
63545,Lecture-2ed0,6,linear,medium,720P,27,92.686290,6.535793,92.783276,7.555798,7.747359,214.8758,188.157629,fixed


In [10]:
""" 计算 BDBR -> 1) 相较于传统 CAE (会有 loss);  2) 相较于 fixed QP (会有 gain) """
from fastCAE.metrics.bjontegaard import BD_PSNR
import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_rows', 8)

grouped = convex_df.groupby(["seqName", "sceneId", "func", "preset"])
rows = []
for i, ((seqName, sceneId, func, preset), group) in tqdm(enumerate(grouped)):
    pred_convex_df   = group[group["convex"] == "pred"].sort_values(by="bitrate").reset_index(drop=True)
    actual_convex_df = group[group["convex"] == "actual"].sort_values(by="bitrate").reset_index(drop=True)
    fixed_convex_df  = group[group["convex"] == "fixed"].sort_values(by="bitrate").reset_index(drop=True)
    
    bd_vmaf_cae = BD_PSNR(
        actual_convex_df["bitrate"].tolist(), actual_convex_df["vmaf"].tolist(), 
        pred_convex_df["bitrate"].tolist(), pred_convex_df["vmaf"].tolist(), piecewise=0
    )
    bd_vmaf_fix = BD_PSNR(
        fixed_convex_df["bitrate"].tolist(), fixed_convex_df["vmaf"].tolist(), 
        pred_convex_df["bitrate"].tolist(), pred_convex_df["vmaf"].tolist(), piecewise=0
    )
    rows.append([seqName, sceneId, func, preset, bd_vmaf_cae, "cae"])
    rows.append([seqName, sceneId, func, preset, bd_vmaf_fix, "fixed"])
    
all_bdbr = pd.DataFrame(rows, columns=["seqName", "sceneId", "func", "preset", "bd_vmaf", "anchor"])
all_bdbr = rm_outliers(all_bdbr, cols=["bd_vmaf"], threshold=0.75)
display(all_bdbr)
display(all_bdbr.describe())                  

all_bdbr.to_csv(f"{table_dir}/BDBR-fitOnly/all_rd_vmaf.csv", index=False)

2682it [00:05, 493.08it/s]


Unnamed: 0,seqName,sceneId,func,preset,bd_vmaf,anchor
0,Lecture-003a,0,linear,faster,0.000000,cae
1,Lecture-003a,0,linear,faster,1.429803,fixed
2,Lecture-003a,0,linear,medium,0.000000,cae
3,Lecture-003a,0,linear,medium,0.752904,fixed
...,...,...,...,...,...,...
5360,Lecture-7f7e,4,quadratic2,medium,0.000000,cae
5361,Lecture-7f7e,4,quadratic2,medium,0.294929,fixed
5362,Lecture-7f7e,4,quadratic2,slower,0.000000,cae
5363,Lecture-7f7e,4,quadratic2,slower,0.298415,fixed


Unnamed: 0,sceneId,bd_vmaf
count,5070.0,5070.0
mean,8.022091,1.536968
std,9.094206,2.472704
min,0.0,-2.387227
25%,1.0,-0.038099
50%,4.0,0.562189
75%,12.0,2.993025
max,39.0,14.966221


In [11]:
# 看一下不同 func 的效果
table_dir = "/home/zhaoy/asset-fastCAE/results/vvenc/tables"
all_bdbr = pd.read_csv(f"{table_dir}/BDBR-fitOnly/all_rd_vmaf.csv")

grouped = all_bdbr.groupby(["anchor", "func", "preset"], as_index=False)

rows = []
for (anchor, func, preset), group in grouped:
    group = group.reset_index(drop=True)
    rows.append([anchor, func, preset, group["bd_vmaf"].mean()])

avg_bdbr = pd.DataFrame(rows, columns=["anchor", "func", "preset", "bd_vmaf"])
avg_bdbr = rm_outliers(avg_bdbr, cols=["bd_vmaf"], threshold=1.25).reset_index(drop=True)
display(avg_bdbr)

avg_bdbr.to_csv(f"{table_dir}/BDBR-fitOnly/avg_rd_vmaf.csv", index=False)

Unnamed: 0,anchor,func,preset,bd_vmaf
0,cae,linear,faster,-0.276595
1,cae,linear,medium,-0.267816
2,cae,linear,slower,-0.237944
3,cae,power,faster,-0.541874
...,...,...,...,...
14,fixed,power,slower,2.643917
15,fixed,quadratic2,faster,3.858496
16,fixed,quadratic2,medium,2.935598
17,fixed,quadratic2,slower,2.959731


In [12]:
display(avg_bdbr[avg_bdbr["anchor"] == "cae"].describe())       # 相比 orig CAE
display(avg_bdbr[avg_bdbr["anchor"] == "fixed"].describe())     # 相比 fixed ladder

Unnamed: 0,bd_vmaf
count,9.0
mean,-0.308274
std,0.153771
min,-0.541874
25%,-0.471722
50%,-0.267816
75%,-0.165944
max,-0.156951


Unnamed: 0,bd_vmaf
count,9.0
mean,3.166893
std,0.5089
min,2.571914
25%,2.935598
50%,3.10925
75%,3.189329
max,4.096882
