To demonstrate the ``SequenceFeature().get_df_pos()`` method, we load the ``DOM_GSEC`` example dataset including its respective features  (see [Breimann24c]_):

In [7]:
import aaanalysis as aa
aa.options["verbose"] = False
df_seq = aa.load_dataset(name="DOM_GSEC", n=20)
labels = df_seq["label"].to_list()
df_feat = aa.load_features(name="DOM_GSEC").head(100)
features = df_feat["feature"].to_list()
sf = aa.SequenceFeature()
df_parts = sf.get_df_parts(df_seq=df_seq)
df_feat = sf.get_df_feat(features=features, labels=labels, df_parts=df_parts)
aa.display_df(df_feat, n_rows=5)

Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,TMD_C_JMD_C-...)-KLEP840101,Energy,Charge,Charge,"Net charge (...t al., 1984)",0.301,0.14,0.14,0.112,0.111,0.001,0.004,3132333435
2,TMD_C_JMD_C-...)-FINA910104,Conformation,α-helix (C-cap),α-helix termination,"Helix termin...t al., 1991)",0.295,0.129,0.129,0.111,0.125,0.001,0.005,3132333435
3,TMD_C_JMD_C-...)-LEVM760105,Shape,Side chain length,Side chain length,"Radius of gy...evitt, 1976)",0.335,0.245,0.245,0.177,0.182,0.0,0.003,3233
4,TMD_C_JMD_C-...)-HUTJ700102,Energy,Entropy,Entropy,"Absolute ent...chens, 1970)",0.306,0.156,0.156,0.105,0.136,0.001,0.004,3132333435
5,TMD_C_JMD_C-...)-RADA880106,ASA/Volume,Volume,Accessible s...e area (ASA),"Accessible s...enden, 1988)",0.342,0.181,0.181,0.139,0.145,0.0,0.003,3233


``df_feat`` must be provided to create ``df_pos``, containing an aggregated numerical value (``mean_dif`` column by default) per a selected scale category level (``category`` by default):


In [8]:
df_pos = sf.get_df_pos(df_feat=df_feat)
aa.display_df(df_feat, n_rows=5)

Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,TMD_C_JMD_C-...)-KLEP840101,Energy,Charge,Charge,"Net charge (...t al., 1984)",0.301,0.14,0.14,0.112,0.111,0.001,0.004,3132333435
2,TMD_C_JMD_C-...)-FINA910104,Conformation,α-helix (C-cap),α-helix termination,"Helix termin...t al., 1991)",0.295,0.129,0.129,0.111,0.125,0.001,0.005,3132333435
3,TMD_C_JMD_C-...)-LEVM760105,Shape,Side chain length,Side chain length,"Radius of gy...evitt, 1976)",0.335,0.245,0.245,0.177,0.182,0.0,0.003,3233
4,TMD_C_JMD_C-...)-HUTJ700102,Energy,Entropy,Entropy,"Absolute ent...chens, 1970)",0.306,0.156,0.156,0.105,0.136,0.001,0.004,3132333435
5,TMD_C_JMD_C-...)-RADA880106,ASA/Volume,Volume,Accessible s...e area (ASA),"Accessible s...enden, 1988)",0.342,0.181,0.181,0.139,0.145,0.0,0.003,3233


You can change the considered numerical and categorical columns using the ``col_value`` and ``col_cat`` parameters: 

In [9]:
df_pos = sf.get_df_pos(df_feat=df_feat, col_value="abs_auc", col_cat="subcategory")
aa.display_df(df_feat, n_rows=5)

Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,TMD_C_JMD_C-...)-KLEP840101,Energy,Charge,Charge,"Net charge (...t al., 1984)",0.301,0.14,0.14,0.112,0.111,0.001,0.004,3132333435
2,TMD_C_JMD_C-...)-FINA910104,Conformation,α-helix (C-cap),α-helix termination,"Helix termin...t al., 1991)",0.295,0.129,0.129,0.111,0.125,0.001,0.005,3132333435
3,TMD_C_JMD_C-...)-LEVM760105,Shape,Side chain length,Side chain length,"Radius of gy...evitt, 1976)",0.335,0.245,0.245,0.177,0.182,0.0,0.003,3233
4,TMD_C_JMD_C-...)-HUTJ700102,Energy,Entropy,Entropy,"Absolute ent...chens, 1970)",0.306,0.156,0.156,0.105,0.136,0.001,0.004,3132333435
5,TMD_C_JMD_C-...)-RADA880106,ASA/Volume,Volume,Accessible s...e area (ASA),"Accessible s...enden, 1988)",0.342,0.181,0.181,0.139,0.145,0.0,0.003,3233


The residue positions can be adjusted using the ``start``, ``tmd_len``, ``jmd_n_len``, and ``jmd_c_len`` parameters:

In [10]:
# Shift positions by 10 residues
df_pos = sf.get_df_pos(df_feat=df_feat, start=11)
aa.display_df(df_feat, n_rows=5)

Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,TMD_C_JMD_C-...)-KLEP840101,Energy,Charge,Charge,"Net charge (...t al., 1984)",0.301,0.14,0.14,0.112,0.111,0.001,0.004,3132333435
2,TMD_C_JMD_C-...)-FINA910104,Conformation,α-helix (C-cap),α-helix termination,"Helix termin...t al., 1991)",0.295,0.129,0.129,0.111,0.125,0.001,0.005,3132333435
3,TMD_C_JMD_C-...)-LEVM760105,Shape,Side chain length,Side chain length,"Radius of gy...evitt, 1976)",0.335,0.245,0.245,0.177,0.182,0.0,0.003,3233
4,TMD_C_JMD_C-...)-HUTJ700102,Energy,Entropy,Entropy,"Absolute ent...chens, 1970)",0.306,0.156,0.156,0.105,0.136,0.001,0.004,3132333435
5,TMD_C_JMD_C-...)-RADA880106,ASA/Volume,Volume,Accessible s...e area (ASA),"Accessible s...enden, 1988)",0.342,0.181,0.181,0.139,0.145,0.0,0.003,3233


In [11]:
# Increase TMD length from 20 to 50
df_pos = sf.get_df_pos(df_feat=df_feat, tmd_len=50)
aa.display_df(df_feat, n_rows=5)

Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,TMD_C_JMD_C-...)-KLEP840101,Energy,Charge,Charge,"Net charge (...t al., 1984)",0.301,0.14,0.14,0.112,0.111,0.001,0.004,3132333435
2,TMD_C_JMD_C-...)-FINA910104,Conformation,α-helix (C-cap),α-helix termination,"Helix termin...t al., 1991)",0.295,0.129,0.129,0.111,0.125,0.001,0.005,3132333435
3,TMD_C_JMD_C-...)-LEVM760105,Shape,Side chain length,Side chain length,"Radius of gy...evitt, 1976)",0.335,0.245,0.245,0.177,0.182,0.0,0.003,3233
4,TMD_C_JMD_C-...)-HUTJ700102,Energy,Entropy,Entropy,"Absolute ent...chens, 1970)",0.306,0.156,0.156,0.105,0.136,0.001,0.004,3132333435
5,TMD_C_JMD_C-...)-RADA880106,ASA/Volume,Volume,Accessible s...e area (ASA),"Accessible s...enden, 1988)",0.342,0.181,0.181,0.139,0.145,0.0,0.003,3233


You can select a specific parts and normalize results using the ``list_parts`` and ``normalize`` parameters:

In [12]:
df_pos = sf.get_df_pos(df_feat=df_feat, list_parts=["jmd_c"], normalize=True)
aa.display_df(df_feat, n_rows=5)

Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,TMD_C_JMD_C-...)-KLEP840101,Energy,Charge,Charge,"Net charge (...t al., 1984)",0.301,0.14,0.14,0.112,0.111,0.001,0.004,3132333435
2,TMD_C_JMD_C-...)-FINA910104,Conformation,α-helix (C-cap),α-helix termination,"Helix termin...t al., 1991)",0.295,0.129,0.129,0.111,0.125,0.001,0.005,3132333435
3,TMD_C_JMD_C-...)-LEVM760105,Shape,Side chain length,Side chain length,"Radius of gy...evitt, 1976)",0.335,0.245,0.245,0.177,0.182,0.0,0.003,3233
4,TMD_C_JMD_C-...)-HUTJ700102,Energy,Entropy,Entropy,"Absolute ent...chens, 1970)",0.306,0.156,0.156,0.105,0.136,0.001,0.004,3132333435
5,TMD_C_JMD_C-...)-RADA880106,ASA/Volume,Volume,Accessible s...e area (ASA),"Accessible s...enden, 1988)",0.342,0.181,0.181,0.139,0.145,0.0,0.003,3233
