# Topological paper analysis

Read the calculated values from <https://www.sciencedirect.com/science/article/pii/S0378381205000269#tbl2> and calculate the RMSE.

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_html("thermo-model-results.html", header=[1], na_values=["N.A."])[0]
df.rename(columns={"Empty Cell": "Component", "Empty Cell.1": "Expected"}, inplace=True)

In [3]:
df.drop(38, inplace=True)
all_na = df.isna().all(axis=1)
df.drop(df.loc[all_na].index, inplace=True)
all_label = df["Component"].str.contains("n")
df.drop(df.loc[all_label].index, inplace=True)

In [4]:
float_cols = df.columns[1:]
df[float_cols] = df[float_cols].apply(pd.to_numeric)
df[float_cols] = np.log10(df[float_cols])

model_cols = df.columns[2:]
resid_cols = []
for col in model_cols:
    resid_name = f"{col}-residuals"
    resid_cols.append(resid_name)
    df[resid_name] = df["Expected"] - df[col]

In [5]:
df

Unnamed: 0,Component,Expected,p-NRTLa,s-UNIQUACb,SAFTc,UNIFAC,m-ADd,p-NRTLa-residuals,s-UNIQUACb-residuals,SAFTc-residuals,UNIFAC-residuals,m-ADd-residuals
1,C4E6,-1.725611,-1.66035,,-1.752763,-2.006299,-1.725611,-0.065261,,0.027152,0.280688,0.0
2,C6E6,-2.896881,-3.093881,,,-2.932557,-2.812761,0.197,,,0.035676,-0.084119
3,C8E6,-3.745694,-4.032125,,-3.718512,-3.855426,-3.837734,0.286431,,-0.027182,0.109732,0.092041
4,C10E6,-4.788612,-4.918653,,-4.823909,-4.776245,-4.844968,0.13004,,0.035296,-0.012368,0.056355
5,C12E6,-5.803547,-5.797512,,-5.87031,-5.694864,-5.854182,-0.006036,,0.066763,-0.108684,0.050635
6,C14E6,-6.743042,-6.674895,,-6.856673,-6.612254,-6.874519,-0.068147,,0.113631,-0.130788,0.131477
7,C16E6,-7.629117,-7.549905,,-7.778847,-7.528122,-7.911864,-0.079212,,0.14973,-0.100995,0.282747
10,C9E8,-4.26536,,-4.280089,-4.271159,-4.27311,-4.348044,,0.014729,0.005799,0.00775,0.082684
11,C10E8,-4.742802,,-4.793444,-4.803271,-4.737549,-4.828566,,0.050642,0.06047,-0.005253,0.085765
12,C11E8,-5.265921,,-5.289713,-5.319755,-5.200384,-5.304256,,0.023793,0.053835,-0.065537,0.038335


In [6]:
df[resid_cols].aggregate(lambda x: np.sqrt(np.mean(np.square([val for val in x if not pd.isna(val)]))))

p-NRTLa-residuals       0.180242
s-UNIQUACb-residuals    0.140155
SAFTc-residuals         0.059569
UNIFAC-residuals        0.142914
m-ADd-residuals         0.107594
dtype: float64