To demonstrate the ``CPP().run()`` method, we load the ``DOM_GSEC`` example dataset (see [Breimann25a]_):

In [11]:
import aaanalysis as aa
aa.options["verbose"] = False
df_seq = aa.load_dataset(name="DOM_GSEC")
labels = df_seq["label"].to_list()
sf = aa.SequenceFeature()
df_parts = sf.get_df_parts(df_seq=df_seq)

You just need to provide ``df_parts`` to the ``CPP`` object and run the algorithm with its respective labels using the ``CPP().run()`` method:

In [12]:
cpp = aa.CPP(df_parts=df_parts)
# Create >500,000 feature and filter them down to 100 features 
df_feat = cpp.run(labels=labels)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (100, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_C_JMD_C-Seg...2,3)-QIAN880106",Conformation,α-helix,α-helix (middle),"Weights for alp...ejnowski, 1988)",0.387,0.118,0.118,0.068,0.08,0.0,0.0,27282930313233
2,"TMD_C_JMD_C-Pat...,14)-CRAJ730103",Conformation,β-turn,β-turn,"Normalized freq...d et al., 1973)",0.377,0.285,-0.285,0.164,0.177,0.0,0.0,2731
3,"TMD_C_JMD_C-Seg...6,9)-FAUJ880104",Shape,Side chain length,Steric parameter,"STERIMOL length...e et al., 1988)",0.367,0.263,0.263,0.161,0.168,0.0,0.0,3233
4,"TMD_C_JMD_C-Seg...6,9)-ONEK900101",Others,Unclassified (Others),ΔG values in peptides,"Delta G values ...-DeGrado, 1990)",0.366,0.111,0.111,0.07,0.114,0.0,0.0,3233
5,"TMD_C_JMD_C-Pat...,15)-QIAN880107",Conformation,α-helix,α-helix (middle),"Weights for alp...ejnowski, 1988)",0.363,0.162,0.162,0.091,0.118,0.0,0.0,24283235
6,"TMD_C_JMD_C-Seg...3,4)-HUTJ700103",Energy,Entropy,Entropy,"Entropy of form...Hutchens, 1970)",0.36,0.187,0.187,0.115,0.128,0.0,0.0,3132333435
7,"TMD_C_JMD_C-Seg...2,3)-WOLS870103",Others,PC 4,Principal Component 3 (Wold),"Principal prope...d et al., 1987)",0.359,0.159,-0.159,0.09,0.13,0.0,0.0,27282930313233
8,"TMD_C_JMD_C-Pat...,12)-CRAJ730103",Conformation,β-turn,β-turn,"Normalized freq...d et al., 1973)",0.352,0.227,-0.227,0.15,0.17,0.0,0.0,242832
9,"TMD_C_JMD_C-Seg...6,9)-MUNV940102",Energy,Free energy (folding),Free energy (α-helix),"Free energy in ...-Serrano, 1994)",0.35,0.129,-0.129,0.079,0.124,0.0,0.0,3233
10,"TMD_C_JMD_C-Seg...3,4)-WOLS870103",Others,PC 4,Principal Component 3 (Wold),"Principal prope...d et al., 1987)",0.341,0.214,-0.214,0.128,0.177,0.0,0.0,3132333435


Adjust **Parts**, **Splits**, and **Scales** as follows:

In [13]:
df_parts = sf.get_df_parts(df_seq=df_seq, list_parts=["tmd_jmd"])
split_kws = sf.get_split_kws(split_types=["Segment"], n_split_min=1, n_split_max=5)
# Load one of the provided top scale datasets
df_scales = aa.load_scales(top60_n=38)  
# Create ~700 feature and filter them down to 19 features 
cpp = aa.CPP(df_parts=df_parts, split_kws=split_kws, df_scales=df_scales)
df_feat = cpp.run(labels=labels)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (19, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_JMD-Segment...4,5)-ROBB760113",Conformation,β-turn,β-turn,"Information mea...n-Suzuki, 1976)",0.316,0.137,-0.137,0.102,0.108,0.0,0.0,2526272829303132
2,"TMD_JMD-Segment...4,4)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.312,0.099,0.099,0.069,0.095,0.0,0.0,31323334353637383940
3,"TMD_JMD-Segment...4,5)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.297,0.086,0.086,0.077,0.068,0.0,0.0,2526272829303132
4,"TMD_JMD-Segment...5,5)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.295,0.141,0.141,0.115,0.13,0.0,0.0,3334353637383940
5,"TMD_JMD-Segment...5,5)-JANJ780102",ASA/Volume,Buried,Buried,"Percentage of b...n et al., 1978)",0.291,0.13,-0.13,0.099,0.124,0.0,0.0,3334353637383940
6,"TMD_JMD-Segment...5,5)-ZIMJ680103",Polarity,Hydrophilicity,Polarity (hydrophilicity),"Polarity (Zimme...n et al., 1968)",0.289,0.178,0.178,0.159,0.163,0.0,0.0,3334353637383940
7,"TMD_JMD-Segment...4,5)-FUKS010106",Composition,Membrane proteins (MPs),Proteins of mesophiles (INT),"Interior compos...ishikawa, 2001)",0.277,0.123,0.123,0.104,0.127,0.0,0.0,2526272829303132
8,"TMD_JMD-Segment...4,4)-WOLR790101",Polarity,Hydrophobicity (surrounding),Hydration potential,"Hydrophobicity ...n et al., 1979)",0.267,0.105,-0.105,0.1,0.113,0.0,1e-06,31323334353637383940
9,"TMD_JMD-Segment...2,2)-CEDJ970105",Composition,AA composition,Nuclear proteins,"Composition of ...o et al., 1997)",0.263,0.062,0.062,0.062,0.069,0.0,1e-06,"21,22,23,24,25,...,36,37,38,39,40"
10,"TMD_JMD-Segment...5,5)-MITS020101",Polarity,Amphiphilicity,Amphiphilicity,"Amphiphilicity ...u et al., 2002)",0.262,0.073,0.073,0.071,0.086,0.0,1e-06,3334353637383940


The maximum number of final features can be adjusted using the ``n_filter`` (default=100) parameter. The actual number of features may be less, depending on: (a) the initial number of features generated (defined by the ``part-split-scale`` combinations), and (b) the strictness of both pre-filtering and filtering criteria.

In [14]:
# Create ~700 feature and filter them down to 10 features 
df_feat = cpp.run(labels=labels, n_filter=10)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (10, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_JMD-Segment...4,5)-ROBB760113",Conformation,β-turn,β-turn,"Information mea...n-Suzuki, 1976)",0.316,0.137,-0.137,0.102,0.108,0.0,0.0,2526272829303132
2,"TMD_JMD-Segment...4,4)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.312,0.099,0.099,0.069,0.095,0.0,0.0,31323334353637383940
3,"TMD_JMD-Segment...4,5)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.297,0.086,0.086,0.077,0.068,0.0,0.0,2526272829303132
4,"TMD_JMD-Segment...5,5)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.295,0.141,0.141,0.115,0.13,0.0,0.0,3334353637383940
5,"TMD_JMD-Segment...5,5)-JANJ780102",ASA/Volume,Buried,Buried,"Percentage of b...n et al., 1978)",0.291,0.13,-0.13,0.099,0.124,0.0,0.0,3334353637383940
6,"TMD_JMD-Segment...5,5)-ZIMJ680103",Polarity,Hydrophilicity,Polarity (hydrophilicity),"Polarity (Zimme...n et al., 1968)",0.289,0.178,0.178,0.159,0.163,0.0,0.0,3334353637383940
7,"TMD_JMD-Segment...4,5)-FUKS010106",Composition,Membrane proteins (MPs),Proteins of mesophiles (INT),"Interior compos...ishikawa, 2001)",0.277,0.123,0.123,0.104,0.127,0.0,0.0,2526272829303132
8,"TMD_JMD-Segment...4,4)-WOLR790101",Polarity,Hydrophobicity (surrounding),Hydration potential,"Hydrophobicity ...n et al., 1979)",0.267,0.105,-0.105,0.1,0.113,0.0,0.0,31323334353637383940
9,"TMD_JMD-Segment...5,5)-MIYS990104",Composition,MPs (anchor),Partition energy,"Optimized relat...Jernigan, 1999)",0.243,0.103,0.103,0.095,0.126,2e-06,4e-06,3334353637383940
10,"TMD_JMD-Segment...4,5)-ANDN920101",Structure-Activity,Backbone-dynamics (-CH),α-CH chemical s...kbone-dynamics),"alpha-CH chemic...n et al., 1992)",0.229,0.102,-0.102,0.097,0.125,9e-06,1.2e-05,2526272829303132


In the initial CPP pre-filtering step, you can either set the number of retained features using ``n_pre_filter`` or define a percentage of initial features with ``pct_pre_filter`` (default with 5%). Additionally, adjust the maximum standard deviation allowed in the test dataset for each feature via ``max_std_test``:

In [15]:
# Pre-filtering by allowing 50% with 0.5 maximum std in the test set 
# Create ~700 feature and filter them down to 26 features
df_feat = cpp.run(labels=labels, pct_pre_filter=50, max_std_test=0.5)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (26, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_JMD-Segment...4,5)-ROBB760113",Conformation,β-turn,β-turn,"Information mea...n-Suzuki, 1976)",0.316,0.137,-0.137,0.102,0.108,0.0,0.0,2526272829303132
2,"TMD_JMD-Segment...4,4)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.312,0.099,0.099,0.069,0.095,0.0,0.0,31323334353637383940
3,"TMD_JMD-Segment...2,2)-ONEK900101",Others,Unclassified (Others),ΔG values in peptides,"Delta G values ...-DeGrado, 1990)",0.31,0.041,0.041,0.028,0.044,0.0,0.0,"21,22,23,24,25,...,36,37,38,39,40"
4,"TMD_JMD-Segment...4,5)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.297,0.086,0.086,0.077,0.068,0.0,0.0,2526272829303132
5,"TMD_JMD-Segment...5,5)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.295,0.141,0.141,0.115,0.13,0.0,1e-06,3334353637383940
6,"TMD_JMD-Segment...5,5)-JANJ780102",ASA/Volume,Buried,Buried,"Percentage of b...n et al., 1978)",0.291,0.13,-0.13,0.099,0.124,0.0,1e-06,3334353637383940
7,"TMD_JMD-Segment...5,5)-ZIMJ680103",Polarity,Hydrophilicity,Polarity (hydrophilicity),"Polarity (Zimme...n et al., 1968)",0.289,0.178,0.178,0.159,0.163,0.0,1e-06,3334353637383940
8,"TMD_JMD-Segment...4,5)-FUKS010106",Composition,Membrane proteins (MPs),Proteins of mesophiles (INT),"Interior compos...ishikawa, 2001)",0.277,0.123,0.123,0.104,0.127,0.0,1e-06,2526272829303132
9,"TMD_JMD-Segment...3,4)-WOLR790101",Polarity,Hydrophobicity (surrounding),Hydration potential,"Hydrophobicity ...n et al., 1979)",0.274,0.052,0.052,0.034,0.06,0.0,1e-06,21222324252627282930
10,"TMD_JMD-Segment...1,2)-WEBA780101",Others,Mutability,RF value,"RF value in hig...er-Lacey, 1978)",0.268,0.042,0.042,0.039,0.046,0.0,2e-06,"1,2,3,4,5,6,7,8...,16,17,18,19,20"


For the final CPP filtering step, you can use the following three parameters: ``max_overlap`` setting the allowed maximum positional overlap of similar features (the higher, the less strict), ``max_cor`` defining the allowed maximum Pearson correlation for scales of similar features (the higher, the less strict), and ``check_cat`` setting whether redundancy of scale categories should be considered or not (setting it to ``False`` will result in stricter filtering since features across all categories are compared): 

In [16]:
# Disable filtering by setting max_overlap and max_cor to 1
# Create ~700 feature and filter them down to 100 features
df_feat = cpp.run(labels=labels, max_overlap=1, max_cor=1)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (100, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_JMD-Segment...4,5)-ROBB760113",Conformation,β-turn,β-turn,"Information mea...n-Suzuki, 1976)",0.316,0.137,-0.137,0.102,0.108,0.0,0.0,2526272829303132
2,"TMD_JMD-Segment...4,4)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.312,0.099,0.099,0.069,0.095,0.0,0.0,31323334353637383940
3,"TMD_JMD-Segment...3,3)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.304,0.069,0.069,0.051,0.073,0.0,0.0,"27,28,29,30,31,...,36,37,38,39,40"
4,"TMD_JMD-Segment...4,5)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.297,0.086,0.086,0.077,0.068,0.0,0.0,2526272829303132
5,"TMD_JMD-Segment...5,5)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.295,0.141,0.141,0.115,0.13,0.0,0.0,3334353637383940
6,"TMD_JMD-Segment...2,2)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.292,0.058,0.058,0.045,0.054,0.0,0.0,"21,22,23,24,25,...,36,37,38,39,40"
7,"TMD_JMD-Segment...5,5)-JANJ780102",ASA/Volume,Buried,Buried,"Percentage of b...n et al., 1978)",0.291,0.13,-0.13,0.099,0.124,0.0,0.0,3334353637383940
8,"TMD_JMD-Segment...4,4)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.291,0.127,0.127,0.097,0.121,0.0,0.0,31323334353637383940
9,"TMD_JMD-Segment...5,5)-ZIMJ680103",Polarity,Hydrophilicity,Polarity (hydrophilicity),"Polarity (Zimme...n et al., 1968)",0.289,0.178,0.178,0.159,0.163,0.0,0.0,3334353637383940
10,"TMD_JMD-Segment...4,4)-ZIMJ680103",Polarity,Hydrophilicity,Polarity (hydrophilicity),"Polarity (Zimme...n et al., 1968)",0.288,0.164,0.164,0.135,0.145,0.0,0.0,31323334353637383940


In [17]:
# Perform stricter filtering by setting check_cat=False
# Create ~700 feature and filter them down to 11 features
df_feat = cpp.run(labels=labels, check_cat=False)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (11, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_JMD-Segment...4,5)-ROBB760113",Conformation,β-turn,β-turn,"Information mea...n-Suzuki, 1976)",0.316,0.137,-0.137,0.102,0.108,0.0,0.0,2526272829303132
2,"TMD_JMD-Segment...4,4)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.312,0.099,0.099,0.069,0.095,0.0,0.0,31323334353637383940
3,"TMD_JMD-Segment...4,5)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.297,0.086,0.086,0.077,0.068,0.0,0.0,2526272829303132
4,"TMD_JMD-Segment...5,5)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.295,0.141,0.141,0.115,0.13,0.0,0.0,3334353637383940
5,"TMD_JMD-Segment...5,5)-JANJ780102",ASA/Volume,Buried,Buried,"Percentage of b...n et al., 1978)",0.291,0.13,-0.13,0.099,0.124,0.0,0.0,3334353637383940
6,"TMD_JMD-Segment...2,2)-CEDJ970105",Composition,AA composition,Nuclear proteins,"Composition of ...o et al., 1997)",0.263,0.062,0.062,0.062,0.069,0.0,1e-06,"21,22,23,24,25,...,36,37,38,39,40"
7,"TMD_JMD-Segment...5,5)-MITS020101",Polarity,Amphiphilicity,Amphiphilicity,"Amphiphilicity ...u et al., 2002)",0.262,0.073,0.073,0.071,0.086,0.0,1e-06,3334353637383940
8,"TMD_JMD-Segment...1,2)-SIMZ760101",Polarity,Hydrophobicity,Transfer free e...TFE) to outside,Transfer free e...-Charton (1982),0.259,0.064,-0.064,0.069,0.072,1e-06,2e-06,"1,2,3,4,5,6,7,8...,16,17,18,19,20"
9,"TMD_JMD-Segment...4,5)-ANDN920101",Structure-Activity,Backbone-dynamics (-CH),α-CH chemical s...kbone-dynamics),"alpha-CH chemic...n et al., 1992)",0.229,0.102,-0.102,0.097,0.125,9e-06,1.7e-05,2526272829303132
10,"TMD_JMD-Segment...4,4)-YUTK870103",Energy,Free energy (unfolding),Free energy (unfolding),"Activation Gibb...i et al., 1987)",0.201,0.084,-0.084,0.115,0.118,0.000103,0.000143,31323334353637383940


The residue positions can be adjusted using the ``start``, ``tmd_len``, ``jmd_n_len``, and ``jmd_c_len`` parameters:

In [18]:
# Shift positions by 10 residues
df_feat = cpp.run(labels=labels, start=11)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (19, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_JMD-Segment...4,5)-ROBB760113",Conformation,β-turn,β-turn,"Information mea...n-Suzuki, 1976)",0.316,0.137,-0.137,0.102,0.108,0.0,0.0,3536373839404142
2,"TMD_JMD-Segment...4,4)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.312,0.099,0.099,0.069,0.095,0.0,0.0,41424344454647484950
3,"TMD_JMD-Segment...4,5)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.297,0.086,0.086,0.077,0.068,0.0,0.0,3536373839404142
4,"TMD_JMD-Segment...5,5)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.295,0.141,0.141,0.115,0.13,0.0,0.0,4344454647484950
5,"TMD_JMD-Segment...5,5)-JANJ780102",ASA/Volume,Buried,Buried,"Percentage of b...n et al., 1978)",0.291,0.13,-0.13,0.099,0.124,0.0,0.0,4344454647484950
6,"TMD_JMD-Segment...5,5)-ZIMJ680103",Polarity,Hydrophilicity,Polarity (hydrophilicity),"Polarity (Zimme...n et al., 1968)",0.289,0.178,0.178,0.159,0.163,0.0,0.0,4344454647484950
7,"TMD_JMD-Segment...4,5)-FUKS010106",Composition,Membrane proteins (MPs),Proteins of mesophiles (INT),"Interior compos...ishikawa, 2001)",0.277,0.123,0.123,0.104,0.127,0.0,0.0,3536373839404142
8,"TMD_JMD-Segment...4,4)-WOLR790101",Polarity,Hydrophobicity (surrounding),Hydration potential,"Hydrophobicity ...n et al., 1979)",0.267,0.105,-0.105,0.1,0.113,0.0,1e-06,41424344454647484950
9,"TMD_JMD-Segment...2,2)-CEDJ970105",Composition,AA composition,Nuclear proteins,"Composition of ...o et al., 1997)",0.263,0.062,0.062,0.062,0.069,0.0,1e-06,"31,32,33,34,35,...,46,47,48,49,50"
10,"TMD_JMD-Segment...5,5)-MITS020101",Polarity,Amphiphilicity,Amphiphilicity,"Amphiphilicity ...u et al., 2002)",0.262,0.073,0.073,0.071,0.086,0.0,1e-06,4344454647484950


In [19]:
# Increase TMD length from 20 to 50
df_feat = cpp.run(labels=labels, tmd_len=50)
aa.display_df(df_feat, n_rows=10, show_shape=True)

DataFrame shape: (19, 13)


Unnamed: 0,feature,category,subcategory,scale_name,scale_description,abs_auc,abs_mean_dif,mean_dif,std_test,std_ref,p_val_mann_whitney,p_val_fdr_bh,positions
1,"TMD_JMD-Segment...4,5)-ROBB760113",Conformation,β-turn,β-turn,"Information mea...n-Suzuki, 1976)",0.316,0.137,-0.137,0.102,0.108,0.0,0.0,"43,44,45,46,47,...,52,53,54,55,56"
2,"TMD_JMD-Segment...4,4)-ZIMJ680104",Energy,Isoelectric point,Isoelectric point,"Isoelectric poi...n et al., 1968)",0.312,0.099,0.099,0.069,0.095,0.0,0.0,"53,54,55,56,57,...,66,67,68,69,70"
3,"TMD_JMD-Segment...4,5)-KANM800103",Conformation,α-helix,α-helix,"Average relativ...sa-Tsong, 1980)",0.297,0.086,0.086,0.077,0.068,0.0,0.0,"43,44,45,46,47,...,52,53,54,55,56"
4,"TMD_JMD-Segment...5,5)-LINS030104",ASA/Volume,Accessible surface area (ASA),ASA (folded protein),"Total median ac...s et al., 2003)",0.295,0.141,0.141,0.115,0.13,0.0,0.0,"57,58,59,60,61,...,66,67,68,69,70"
5,"TMD_JMD-Segment...5,5)-JANJ780102",ASA/Volume,Buried,Buried,"Percentage of b...n et al., 1978)",0.291,0.13,-0.13,0.099,0.124,0.0,0.0,"57,58,59,60,61,...,66,67,68,69,70"
6,"TMD_JMD-Segment...5,5)-ZIMJ680103",Polarity,Hydrophilicity,Polarity (hydrophilicity),"Polarity (Zimme...n et al., 1968)",0.289,0.178,0.178,0.159,0.163,0.0,0.0,"57,58,59,60,61,...,66,67,68,69,70"
7,"TMD_JMD-Segment...4,5)-FUKS010106",Composition,Membrane proteins (MPs),Proteins of mesophiles (INT),"Interior compos...ishikawa, 2001)",0.277,0.123,0.123,0.104,0.127,0.0,0.0,"43,44,45,46,47,...,52,53,54,55,56"
8,"TMD_JMD-Segment...4,4)-WOLR790101",Polarity,Hydrophobicity (surrounding),Hydration potential,"Hydrophobicity ...n et al., 1979)",0.267,0.105,-0.105,0.1,0.113,0.0,1e-06,"53,54,55,56,57,...,66,67,68,69,70"
9,"TMD_JMD-Segment...2,2)-CEDJ970105",Composition,AA composition,Nuclear proteins,"Composition of ...o et al., 1997)",0.263,0.062,0.062,0.062,0.069,0.0,1e-06,"36,37,38,39,40,...,66,67,68,69,70"
10,"TMD_JMD-Segment...5,5)-MITS020101",Polarity,Amphiphilicity,Amphiphilicity,"Amphiphilicity ...u et al., 2002)",0.262,0.073,0.073,0.071,0.086,0.0,1e-06,"57,58,59,60,61,...,66,67,68,69,70"


Multiprocessing can be enabled by using the ``n_jobs`` parameter, which is set to the maximum if ``n_jobs=None``. However, this is only recommend for more than ~1000 features per core due to potential process management overhead.  

In [20]:
import time

# Run without multiprocessing
time_start = time.time()
df_feat = cpp.run(labels=labels, n_jobs=1)
time_no_mp = round(time.time() - time_start, 2)
print(f"Time without multiprocessing: {time_no_mp} seconds")

# Run with multiprocessing
time_start = time.time()
df_feat = cpp.run(labels=labels, n_jobs=None)
time_mp = round(time.time() - time_start, 2)
print(f"Time with multiprocessing. {time_mp} seconds")

Time without multiprocessing: 0.63 seconds
Time with multiprocessing. 0.34 seconds
