In [1]:
import polars as pl
from pathlib import Path
import json

In [5]:
# Loading params keyword dictionary
kw_file = "data/keywords/params_keywords.yaml"
with open(kw_file, 'r') as f:
    data = json.load(f)

# Transfer dictionary to dataframe
rows = []
for param_id, keywords in data.items():
    rows.append({
        'parameter_id': int(param_id),
        'keywords': keywords
    })
keywords_df = pl.DataFrame(rows)

In [9]:
# Dataframe display data settings
pl.Config.set_fmt_str_lengths(1000)
pl.Config.set_tbl_width_chars(1000)
pl.Config.set_fmt_table_cell_list_len(-1)
pl.Config.set_tbl_rows(-1)
keywords_df

parameter_id,keywords
i64,list[str]
1,"[""weight"", ""mass"", ""heavy"", ""light"", ""moving"", ""mobile"", ""dynamic""]"
2,"[""weight"", ""mass"", ""heavy"", ""light"", ""stationary"", ""fixed"", ""static""]"
3,"[""length"", ""angle"", ""long"", ""short"", ""dimension"", ""angular"", ""moving"", ""mobile""]"
4,"[""length"", ""angle"", ""long"", ""short"", ""dimension"", ""angular"", ""stationary"", ""fixed""]"
5,"[""area"", ""surface"", ""coverage"", ""moving"", ""mobile""]"
6,"[""area"", ""surface"", ""coverage"", ""stationary"", ""fixed""]"
7,"[""volume"", ""capacity"", ""size"", ""moving"", ""mobile""]"
8,"[""volume"", ""capacity"", ""size"", ""stationary"", ""fixed""]"
9,"[""shape"", ""form"", ""geometry"", ""configuration"", ""design"", ""contour""]"
10,"[""amount"", ""substance"", ""quantity"", ""material"", ""matter"", ""mass""]"


In [10]:
sem_weights = [0.3, 0.3, 0.3, 0.5, 0.7]
km_weights = [0.3, 0.2, 0.5, 0.2, 0.1]
tfidf_weights = [0.4, 0.5, 0.2, 0.3, 0.2]

method_weights_df = pl.DataFrame({
    "semantic_w": sem_weights,
    "keyword_w": km_weights,
    "tfidf_w": tfidf_weights
})

In [11]:
method_weights_df

semantic_w,keyword_w,tfidf_w
f64,f64,f64
0.3,0.3,0.4
0.3,0.2,0.5
0.3,0.5,0.2
0.5,0.2,0.3
0.7,0.1,0.2


In [12]:
claim_weights = [0.3, 0.3, 0.3, 0.5, 0.7]
abs_weights = [0.3, 0.2, 0.5, 0.3, 0.1]
describe_weights = [0.4, 0.5, 0.2, 0.2, 0.2]

section_weights_df = pl.DataFrame({
    "abstract_w": claim_weights,
    "description_w": abs_weights,
    "claim_w": describe_weights
})

In [13]:
section_weights_df

abstract_w,description_w,claim_w
f64,f64,f64
0.3,0.3,0.4
0.3,0.2,0.5
0.3,0.5,0.2
0.5,0.3,0.2
0.7,0.1,0.2
