In [19]:
import pandas as pd

# Replace with your file path
file_path = "../../../data/final-tor-2.csv"

# Load CSV, treating "N/A" and similar as NaN
na_values = ["N/A", "n/a", "NA", "na", "Null", "null", "", " "]
df = pd.read_csv(file_path, na_values=na_values)

# Ensure key columns are numeric
cols = ["caller_pesq_mos", "caller_jitter_std_dev", "callee_pesq_mos", "callee_jitter_std_dev"]
for col in cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

# Drop rows with NaN in key columns
df = df.dropna(subset=cols)

# (Optional) Drop rows with zeros in key columns
df = df[(df["caller_pesq_mos"] != 0) &
        (df["caller_jitter_std_dev"] != 0) &
        (df["callee_pesq_mos"] != 0) &
        (df["callee_jitter_std_dev"] != 0)]

print(f"Remaining rows after cleaning: {len(df)}")


Remaining rows after cleaning: 26289


In [20]:
results = {}

# Caller correlations
results["Caller"] = {
    "Pearson": df["caller_pesq_mos"].corr(df["caller_jitter_std_dev"], method="pearson"),
    "Spearman": df["caller_pesq_mos"].corr(df["caller_jitter_std_dev"], method="spearman"),
    "Kendall": df["caller_pesq_mos"].corr(df["caller_jitter_std_dev"], method="kendall"),
}

# Callee correlations
results["Callee"] = {
    "Pearson": df["callee_pesq_mos"].corr(df["callee_jitter_std_dev"], method="pearson"),
    "Spearman": df["callee_pesq_mos"].corr(df["callee_jitter_std_dev"], method="spearman"),
    "Kendall": df["callee_pesq_mos"].corr(df["callee_jitter_std_dev"], method="kendall"),
}

results


{'Caller': {'Pearson': np.float64(-0.4125983799274007),
  'Spearman': np.float64(-0.6593132016742744),
  'Kendall': np.float64(-0.47469690729659225)},
 'Callee': {'Pearson': np.float64(-0.3466228584072197),
  'Spearman': np.float64(-0.581313699883719),
  'Kendall': np.float64(-0.4337983281627414)}}

In [21]:
# Combine caller + callee into one dataset
combined = pd.DataFrame({
    "pesq_mos": pd.concat([df["caller_pesq_mos"], df["callee_pesq_mos"]], ignore_index=True),
    "jitter_std_dev": pd.concat([df["caller_jitter_std_dev"], df["callee_jitter_std_dev"]], ignore_index=True)
})

# Calculate correlations
combined_corrs = {
    "Pearson": combined["pesq_mos"].corr(combined["jitter_std_dev"], method="pearson"),
    "Spearman": combined["pesq_mos"].corr(combined["jitter_std_dev"], method="spearman"),
    "Kendall": combined["pesq_mos"].corr(combined["jitter_std_dev"], method="kendall"),
}

combined_corrs

{'Pearson': np.float64(-0.369755136893797),
 'Spearman': np.float64(-0.6096124580346292),
 'Kendall': np.float64(-0.44380848603235235)}