In [13]:
import sys  
from pathlib import Path
from io import StringIO

parent_dir = str(Path().resolve().parents[0])
sys.path.insert(0, parent_dir)
print(parent_dir)

/Users/ak2002/Documents/automata/scoring_machine


In [9]:
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

In [110]:
url = "https://pmc.ncbi.nlm.nih.gov/articles/PMC8200627/"

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

r = requests.get(url, headers=header)

In [111]:
tables = pd.read_html(StringIO(r.text), flavor="bs4")

In [148]:
norms = tables[4]

In [186]:
norms_f = norms.iloc[:25, :]
norms_f.columns = norms_f.columns.set_names(["gender","norms_id","scale"])
norms_f.columns = norms_f.columns.droplevel(0)
norms_f = norms_f.melt(id_vars=[("Age in years", "Sum score")]).dropna()
norms_f.columns = ["raw", "norms_id", "scale", "std"]
norms_f = norms_f.loc[:,("norms_id", "scale", "raw", "std")]
norms_f

Unnamed: 0,norms_id,scale,raw,std
0,14–29 (n = 179),D,0,42
1,14–29 (n = 179),D,1,60
2,14–29 (n = 179),D,2,65
3,14–29 (n = 179),D,3,74
4,14–29 (n = 179),D,4,78
...,...,...,...,...
588,≥70 (n = 202),A,13,98
589,≥70 (n = 202),A,14,99
590,≥70 (n = 202),A,15,99
591,≥70 (n = 202),A,16,99


In [187]:
# 14–29 (n = 179), 30–39 (n = 174),  40–49 (n = 174), 50–59 (n = 172), 60–69 (n = 193), ≥70
norms_f = norms_f.replace({"norms_id": {
    "14–29 (n = 179)": "eu_f_1429",
    "30–39 (n = 174)": "eu_f_3039",
    "40–49 (n = 174)": "eu_f_4049",
    "50–59 (n = 172)": "eu_f_5059",
    "60–69 (n = 193)": "eu_f_6069",
    "≥70 (n = 202)": "eu_f_7099"
}})
norms_f

Unnamed: 0,norms_id,scale,raw,std
0,eu_f_1429,D,0,42
1,eu_f_1429,D,1,60
2,eu_f_1429,D,2,65
3,eu_f_1429,D,3,74
4,eu_f_1429,D,4,78
...,...,...,...,...
588,eu_f_7099,A,13,98
589,eu_f_7099,A,14,99
590,eu_f_7099,A,15,99
591,eu_f_7099,A,16,99


In [196]:
norms_m = norms.iloc[25:, :]
columns = pd.MultiIndex.from_tuples(list(zip(*norms_m.iloc[[0,1,2],:].values)))
data = norms.iloc[28:, :].values
norms_m = pd.DataFrame(data=data, columns=columns)
norms_m.columns = norms_m.columns.droplevel(0)
norms_m = norms_m.melt(id_vars=[("Age in years", "Sum Score")]).dropna()
norms_m.columns = ["raw", "norms_id", "scale", "std"]
norms_m = norms_m.loc[:, ["norms_id", "scale","raw", "std"]]
norms_m

Unnamed: 0,norms_id,scale,raw,std
0,14–29 (n = 195),D,0,45
1,14–29 (n = 195),D,1,61
2,14–29 (n = 195),D,2,70
3,14–29 (n = 195),D,3,75
4,14–29 (n = 195),D,4,83
...,...,...,...,...
585,≥70 (n = 99),A,10,94
586,≥70 (n = 99),A,11,96
587,≥70 (n = 99),A,12,99
588,≥70 (n = 99),A,13,99


In [197]:
# 14–29 (n = 195)    94
# 30–39 (n = 142)    88
# 50–59 (n = 156)    82
# 60–69 (n = 198)    79
# 40–49 (n = 182)     79
# ≥70 (n = 99)       79

norms_m = norms_m.replace({"norms_id": {
    "14–29 (n = 195)": "eu_m_1429",
    "30–39 (n = 142)": "eu_m_3039",
    "40–49 (n = 182)": "eu_m_4049",
    "50–59 (n = 156)": "eu_m_5059",
    "60–69 (n = 198)": "eu_m_6069",
    "≥70 (n = 99)": "eu_m_7099"
}})
norms_m

Unnamed: 0,norms_id,scale,raw,std
0,eu_m_1429,D,0,45
1,eu_m_1429,D,1,61
2,eu_m_1429,D,2,70
3,eu_m_1429,D,3,75
4,eu_m_1429,D,4,83
...,...,...,...,...
585,eu_m_7099,A,10,94
586,eu_m_7099,A,11,96
587,eu_m_7099,A,12,99
588,eu_m_7099,A,13,99


In [207]:
final = pd.concat([norms_m, norms_f]).reset_index(drop=True)

In [208]:
final["std"] = pd.to_numeric(final["std"])
final["std_interpretation"] = "◦"

# available signs ꜜ ꜛ ◦
c1 = final["std"].between(0, 5, inclusive="left")
c2 = final["std"].between(5, 10, inclusive="left")
c3 = final["std"].between(10, 20, inclusive="left")
c4 = final["std"].between(80, 90, inclusive="left")
c5 = final["std"].between(90, 95, inclusive="left")
c6 = final["std"].between(95, 500, inclusive="left")

final.loc[c1, "std_interpretation"] = "ꜜꜜꜜ"
final.loc[c2, "std_interpretation"] = "ꜜꜜ"
final.loc[c3, "std_interpretation"] = "ꜜ"
final.loc[c4, "std_interpretation"] = "ꜛ"
final.loc[c5, "std_interpretation"] = "ꜛꜛ"
final.loc[c6, "std_interpretation"] = "ꜛꜛꜛ"

final.to_csv("poms_norms.csv", index=False)

In [209]:
final

Unnamed: 0,norms_id,scale,raw,std,std_interpretation
0,eu_m_1429,D,0,45,◦
1,eu_m_1429,D,1,61,◦
2,eu_m_1429,D,2,70,◦
3,eu_m_1429,D,3,75,◦
4,eu_m_1429,D,4,83,ꜛ
...,...,...,...,...,...
1024,eu_f_7099,A,13,98,ꜛꜛꜛ
1025,eu_f_7099,A,14,99,ꜛꜛꜛ
1026,eu_f_7099,A,15,99,ꜛꜛꜛ
1027,eu_f_7099,A,16,99,ꜛꜛꜛ
