In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
dataset=pd.read_csv("data.csv")
dataset

Unnamed: 0,chroma_stft,chroma_cqt,chroma_cens,melspectrogram,mfccs,rms,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,...,poly_features,zero_crossing_rate,harmonic_centroid,harmonic_tonnetz,harmonic_rms,harmonic_spectral_flatness,harmonic_spectral_contrast,harmonic_spectral_rolloff,harmonic_zero_crossing_rate,class
0,0.314853,0.449387,0.269628,0.868352,-8.460384,0.050471,2055.744424,2122.464978,18.175618,0.028133,...,0.557306,0.096934,1435.764890,-0.005154,0.019528,0.002812,19.814962,3142.599355,0.045778,assistance off
1,0.333241,0.410405,0.256706,0.847919,-8.759901,0.051537,2203.288489,2101.405569,18.541361,0.042439,...,0.550201,0.122422,1441.055795,0.006465,0.028310,0.003472,20.931377,2942.696027,0.051543,assistance off
2,0.365795,0.491572,0.270528,0.852467,-10.320987,0.043890,1978.270511,2104.966467,17.458287,0.038269,...,0.475075,0.096132,1404.367754,-0.004612,0.015012,0.004198,18.908135,2962.471830,0.051135,assistance off
3,0.366682,0.449307,0.263398,0.392838,-11.028899,0.033162,2010.991748,2108.957274,17.270739,0.034354,...,0.368664,0.095388,1257.770677,0.002987,0.013581,0.002509,19.202183,2729.596096,0.041403,assistance off
4,0.382542,0.491022,0.267234,0.501142,-10.479601,0.035843,1670.864122,1918.899049,17.294483,0.025254,...,0.392079,0.077822,1137.549262,-0.006427,0.015291,0.001925,18.965096,2195.396686,0.039714,assistance off
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,0.320585,0.360972,0.214530,1.029822,-16.066755,0.050752,1980.002852,2158.087737,21.337520,0.017429,...,0.337920,0.111399,1347.192344,-0.007183,0.043869,0.005473,24.802369,2525.889587,0.063995,hello
1086,0.406721,0.443279,0.238275,0.781018,-16.405330,0.039501,2419.303388,2325.301043,20.082895,0.028263,...,0.300632,0.136433,1416.064868,0.000448,0.034590,0.007167,23.309083,2732.729117,0.062222,hello
1087,0.365377,0.409115,0.225390,0.821256,-17.146938,0.043208,1930.901676,2044.480949,20.104520,0.018393,...,0.310116,0.104292,1203.158622,-0.004164,0.033083,0.004618,22.891717,2082.013640,0.051918,hello
1088,0.388034,0.443211,0.235701,0.738131,-16.789186,0.039363,1990.073375,2274.138626,19.599763,0.020506,...,0.279512,0.102178,1320.318022,-0.003163,0.030124,0.006198,22.296483,2497.189002,0.056122,hello


# Audio Features Documentation

| Feature Name                  | Description                                                                                                            |
|-------------------------------|------------------------------------------------------------------------------------------------------------------------|
| chroma_stft                   | Chroma short-time Fourier transform: Represents the 12 different pitch classes and their intensities in a segment.  |
| chroma_cqt                    | Chroma constant-Q transform: Similar to chroma_stft but uses a constant-Q transform for more musically relevant bins.   |
| chroma_cens                   | Chroma energy normalized statistics: Normalized version of chroma features, emphasizing energy distribution.           |
| melspectrogram                | Mel spectrogram: Represents the power spectrum of a signal, with emphasis on perceptually relevant frequency bands.     |
| mfccs                         | Mel-frequency cepstral coefficients: Capture the shape of the power spectrum, commonly used in speech and audio analysis.|
| rms                           | Root mean square: Represents the energy of an audio signal.                                                            |
| spectral_centroid             | Center of mass of the spectrum: Indicates where the "center of mass" of the spectrum is located.                        |
| spectral_bandwidth            | Width of the spectral band: Describes the width of the spectral band around the spectral centroid.                        |
| spectral_contrast             | Spectral contrast: Measures the difference in amplitude between peaks and valleys in the spectrum.                     |
| spectral_flatness             | Spectral flatness: Indicates how flat or peaky the spectrum is.                                                          |

### Additional Harmonic Features:

| Feature Name                     | Description                                                                                          |
|----------------------------------|------------------------------------------------------------------------------------------------------|
| harmonic_centroid                | Centroid of the harmonic spectrum: Indicates the center of mass of the harmonic component.         |
| harmonic_tonnetz                 | Tonal centroid features: Captures harmonic content in a way similar to human perception of tonality.|
| harmonic_rms                     | RMS of the harmonic component: Represents the energy of the harmonic part of the signal.            |
| harmonic_spectral_flatness       | Spectral flatness of the harmonic component: Similar to spectral flatness but for the harmonic part.  |
| harmonic_spectral_contrast       | Spectral contrast of the harmonic component: Measures the difference in amplitude for harmonic peaks and valleys.|
| harmonic_spectral_rolloff        | Spectral rolloff of the harmonic component: Frequency below which a specified percentage of the total harmonic energy lies. |
| harmonic_zero_crossing_rate      | Zero-crossing rate of the harmonic component: Measures the rate at which the signal changes its sign in the harmonic part.|

# Importance of Audio Features

| Feature Name                  | Importance                                                                                                          |
|-------------------------------|----------------------------------------------------------------------------------------------------------------------|
| chroma_stft                   | Useful for classification, chord recognition, and audio similarity analysis.                            |
| chroma_cqt                    | Valuable for pitch detection, music transcription, and recognizing musical patterns.                                 |
| chroma_cens                   | Enhances chroma features for improved musical analysis, such as chord and genre recognition.                         |
| melspectrogram                | Fundamental for speech recognition, music genre classification, and audio event detection.                          |
| mfccs                         | Widely used in speech and speaker recognition, audio classification, and emotion detection.                          |
| rms                           | Provides a measure of overall signal energy, crucial for audio segmentation and scene analysis.                      |
| spectral_centroid             | Useful in audio segmentation, speech recognition, and musical instrument classification.                             |
| spectral_bandwidth            | Important for audio classification tasks, such as distinguishing between different musical instruments.              |
| spectral_contrast             | Helps in recognizing sound texture, music genre classification, and environmental sound analysis.                    |
| spectral_flatness             | Indicates whether the spectrum is tonal or noisy, valuable for music genre and mood classification.                   |

### Additional Harmonic Features:

| Feature Name                     | Importance                                                                                                              |
|----------------------------------|--------------------------------------------------------------------------------------------------------------------------|
| harmonic_centroid                | Useful for harmonic content analysis, instrument recognition, and tonal feature extraction.                             |
| harmonic_tonnetz                 | Valuable for harmonic analysis, music key detection, and tonal structure representation.                                  |
| harmonic_rms                     | Important for distinguishing between harmonic and inharmonic components, aiding in audio quality assessment.              |
| harmonic_spectral_flatness       | Helps in characterizing the harmonic content's spectral shape, relevant for harmonic feature analysis and classification.|
| harmonic_spectral_contrast       | Useful for distinguishing harmonic structures in music and recognizing patterns in harmonic components.                   |
| harmonic_spectral_rolloff        | Provides insights into the distribution of harmonic energy, aiding in harmonic content analysis and instrument detection.|
| harmonic_zero_crossing_rate      | Helpful for analyzing harmonic changes in the signal, e.g., detecting transitions in musical notes or chords.             |


# Feactures

X=dataset.iloc[:,:-1].values
X

# Labels

y=dataset.iloc[:,-1].values
y

# Encode the Catagorical Labels into Numeric Labels

from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
y=le.fit_transform(y)

# Splitting the data Set into Training and Testing Set

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.25,stratify=y,random_state=42)

# Feacture Scaling

from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
x_train=ss.fit_transform(x_train)

x_test=ss.fit_transform(x_test)
x_test

# Feature Selection/Engineering

# Introduction:

Feature selection is a crucial step in developing effective machine learning models, particularly for classification tasks. This process involves selecting a subset of relevant features from the original data set, aiming to improve the model's performance,generalizability, and interpretability. By eliminating irrelevant or redundant features, we reduce overfitting and enhance the model's ability to generalize to unseen data. Additionally, focusing on important features improves the model's interpretability by highlighting the key factors influencing its predictions.

### Methods:

This code implements various feature selection methods commonly used in classification tasks:

### 1. Principal Component Analysis (PCA):

Description: Reduces dimensionality by retaining principal components that explain most of the data variance.
Significance: Reduces noise, improves model performance and interpretability by focusing on key components.
### 2. Forward Feature Selection:

Description: Sequentially adds features that improve the model's performance.
Significance: Identifies the minimum set of features needed for good performance, reducing complexity and training time.
### 3. Recursive Feature Elimination (RFECV):

Description: Eliminates features iteratively based on their importance to the model.
Significance: Prevents overfitting by removing redundant or irrelevant features, improvinggeneralizability.
### 4. Fisher Score:

Description: Ranks features according to their ability to distinguish between different classes.
Significance: Identifies features with high discriminative power, enhancing the model's ability to differentiate between classes.
### 5. Feature Importance for Logistic Regression:

Description: Measures the weight assigned to each feature by the Logistic Regression model.
Significance: Highlights features with significant influence on the model's predictions, improving interpretability.
### 6. Feature Importance for Random Forest:

Description: Measures the importance of each feature in the Random Forest model.
Significance: Identifies features that contribute most to the model's performance, guiding further analysis and optimization.
### 7. Mutual Information:

Description: Quantifies the mutual dependence between features and the target variable.
Significance: Identifies features with strong relationships to the target class, providing valuable insights into the model's decision-making process.
### 8. Correlation:

Description: Analyzes the linear relationship between features.
Significance: Helps identify redundant or irrelevant features based on high correlations, reducing model complexity.

# Into Class Structure

In [3]:
class Data_Preprocessing:  
    
    def __init__(self):
        self.dataset=pd.read_csv("data.csv")
        self.x_train=[],
        self.x_test=[],
        self.y_train=[],
        self.y_test=[],
#         self.X=[],
#         self.y=[]
    
    def get_x_train(self):
        return self.x_train
    def get_y_train(self):
        return self.y_train
    def get_x_test(self):
        return self.x_test
    def get_y_test(self):
        return self.y_test

    def get_X(self):
        temp= self.dataset.iloc[:,:-1].values
        return temp

    def get_y(self):
        temp= self.dataset.iloc[:,-1].values
        return temp
    
    def Encode_labels(self,y):
        le=LabelEncoder()
        temp=le.fit_transform(y)
        return temp
    
    def split_X_and_y(self,X,y):
        self.x_train,self.x_test,self.y_train,self.y_test=train_test_split(X,y,test_size=0.25,stratify=y,random_state=42)
    
    def scaling_data(self):
        ss=StandardScaler()
        self.x_train=ss.fit_transform(self.x_train)
        self.x_test=ss.fit_transform(self.x_test)
        
    def preprocessing(self):
        X=self.get_X()
#         print(X)
        y=self.get_y()
#         print(y)
        en_y=self.Encode_labels(y)
#         print(en_y)
        self.split_X_and_y(X,en_y)
        self.scaling_data()


In [5]:
preprocessor=Data_Preprocessing()
preprocessor.preprocessing()
X=preprocessor.get_x_train()
y=preprocessor.get_y_train()
y.shape
X.shape
X[0:50]

array([[ 8.22544672e-02,  2.79880821e-01,  8.61749178e-02,
        -7.12197241e-01,  1.18129686e+00, -7.60354861e-01,
        -7.82564403e-01, -5.19097392e-01, -1.05121936e+00,
        -8.51424380e-01, -5.40214162e-01, -4.03967822e-01,
        -7.59740973e-01, -4.24029769e-01, -9.63963635e-01,
        -8.21101591e-01, -5.23336969e-01, -1.17880279e+00,
        -3.28928714e-01, -2.82759618e-01],
       [ 1.06872737e+00,  3.36196448e-01, -6.38027219e-01,
        -3.75991912e-01, -9.89588606e-01, -5.98048519e-01,
         6.64902693e-01,  4.02021187e-01,  1.42643650e-01,
        -1.23232293e-01,  3.86148177e-01, -1.01003419e+00,
         1.13962559e+00,  3.80226972e-01,  5.48037961e-01,
        -2.03133997e-01,  3.84419865e-01,  2.29742742e-01,
         2.57288844e-01,  8.70475598e-01],
       [ 1.19654692e+00,  6.53146097e-01,  8.87021341e-01,
        -5.98076967e-01, -4.35542239e-01, -7.56635660e-01,
        -1.36736868e-01, -3.02378281e-01,  1.95616819e-01,
        -2.10280789e-01, -1.7

In [61]:
class feature_selection():
    
#     def __init__(self):
        
#         self.data_preprocesser=Data_Preprocessing()
#         self.data_preprocesser.preprocessing()
#         self.x_train=self.data_preprocesser.get_x_train(),
#         self.x_test=self.data_preprocesser.get_x_test(),
#         self.y_train=self.data_preprocesser.get_y_train(),
#         self.y_test=self.data_preprocesser.get_y_test(),
#         print(self.x_test)
        
    def Dummy_model(self,X,y):
        
        classifier = LogisticRegression()
        classifier.fit(X,y)
        score=classifier.score(X,y)
        return score
    
    def pca_test(self,X):
        pca=PCA(0.95)
        df_pca=pca.fit_transform(X)
        return df_pca
#         df_pca
#         d=pd.DataFrame(df_pca)
#         d
#         pca.explained_variance_ratio_
#         df_pca.shape
    
    def forward_test(self,X,y):
        model = LogisticRegression()
        selector = SequentialFeatureSelector(estimator=model)
        selector.fit(X,y)
        selected_features = selector.get_support()
        return selected_features
    
    def recursive_feature_elimination(self,X,y):
        model = LogisticRegression()  #max_iter=1000
        selector = RFECV(estimator=model, cv=5)
        selector.fit(X,y)
        selected_features = selector.get_support()
        return selected_features
    
    def fisher_score(self,X,y):
        # Perform Fisher score analysis
        f_scores = f_classif(X,y)[0]

        # Select features with high scores (e.g., greater than 0.5)
        threshold = 0.5
        selected_features = f_scores > threshold
        # d=pd.DataFrame(df_pca)
        # Print selected features
        # print(f"Selected features: {list(d.columns[selected_features])}")
#         print(f_scores)
        return selected_features

    
    def feature_importances_LG(self,X,y):
    
        # Train the model with multiclass classification
        model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
        model.fit(X,y)

        # Get feature importance scores
        feature_importances = model.coef_[0]

        # Select important features based on importance scores
        threshold = 0.05
        selected_features = np.where(np.abs(feature_importances) > threshold)[0]

        # # Print selected features
        # print(f"Selected features: {list(X.columns[selected_features])}")
        feature_importances
        return selected_features
    
    def feature_importances_RFT(self,X,y):
        
        # Train the model
        model = RandomForestClassifier(random_state=42)
        model.fit(X,y)

        # Get feature importance scores
        feature_importances = model.feature_importances_

        # Select important features based on importance scores
        threshold = 0.05
        selected_features = np.where(feature_importances > threshold)[0]

        # # Print selected features
        # print(f"Selected features: {list(X.columns[selected_features])}")
        selected_features
        res=[False for i in range(20)]
        for i in range(9):
            res[selected_features[i]]=True
#         print(res,end="")
#         feature_importances
        return res
    
    def mutual_information(self,X,y):

        # Perform mutual information calculation
        information_gain = mutual_info_classif(X,y)

        # Select features with high information gain (e.g., greater than 0.1)
        threshold = 0.1
        selected_features = information_gain > threshold

        # # Print selected features
        # print(f"Selected features: {list(X.columns[selected_features])}")
        return selected_features
    
    def correlation(self,X):
        # x_train
        d=pd.DataFrame(X)
        return d.corr()

In [62]:
fc=feature_selection()

In [41]:
fc.Dummy_model(X,y)
# x=pd.DataFrame(fc.x_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.41370869033047736

In [27]:
fc.correlation(X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,1.0,0.706786,0.481997,-0.01736,0.404672,-0.134584,0.237649,0.214044,-0.310651,0.358158,0.296733,-0.036161,0.25708,0.37774,0.064008,-0.331375,0.402919,-0.386501,0.426607,0.256717
1,0.706786,1.0,0.84161,-0.061432,0.480922,-0.144588,0.04694,-0.010771,-0.35776,0.088369,0.082478,0.064737,0.139259,0.238149,0.08098,-0.362351,0.152739,-0.403934,0.247382,0.339047
2,0.481997,0.84161,1.0,-0.027161,0.423982,-0.078482,-0.097067,-0.15875,-0.388371,-0.03092,-0.08775,0.171782,0.006068,0.103547,0.057791,-0.358198,-0.012239,-0.469302,0.091703,0.232484
3,-0.01736,-0.061432,-0.027161,1.0,0.217872,0.946192,-0.255928,-0.347102,0.412614,0.134381,-0.25813,0.86477,-0.105058,-0.238857,0.014453,0.817691,0.214996,0.397168,-0.22593,-0.047088
4,0.404672,0.480922,0.423982,0.217872,1.0,0.306532,0.157869,0.11928,-0.315543,0.187664,0.192133,0.477818,0.147716,0.248248,-0.010589,0.168588,0.070853,-0.348034,0.263819,0.205262
5,-0.134584,-0.144588,-0.078482,0.946192,0.306532,1.0,-0.217801,-0.345589,0.427458,0.079146,-0.250891,0.938899,-0.050837,-0.218682,0.028219,0.901588,0.148469,0.41329,-0.230335,0.014328
6,0.237649,0.04694,-0.097067,-0.255928,0.157869,-0.217801,1.0,0.902207,-0.203243,0.614943,0.961995,-0.230111,0.84152,0.929798,-0.018414,-0.184785,0.360327,-0.155718,0.916326,0.579435
7,0.214044,-0.010771,-0.15875,-0.347102,0.11928,-0.345589,0.902207,1.0,-0.333376,0.571556,0.951914,-0.380617,0.557559,0.85273,-0.098872,-0.270135,0.291442,-0.272873,0.870427,0.324427
8,-0.310651,-0.35776,-0.388371,0.412614,-0.315543,0.427458,-0.203243,-0.333376,1.0,-0.273599,-0.258057,0.246025,0.005416,-0.312166,0.277459,0.481636,-0.099965,0.964209,-0.306686,-0.079612
9,0.358158,0.088369,-0.03092,0.134381,0.187664,0.079146,0.614943,0.571556,-0.273599,1.0,0.613216,0.063936,0.534145,0.594418,-0.106905,-0.008035,0.690935,-0.225905,0.595955,0.335966


In [28]:
pca=fc.pca_test(X)
df_pca=pd.DataFrame(pca)
df_pca

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,-0.410785,-2.191436,1.394648,-1.022300,-0.670178,0.225020,-0.024581,-1.071834,-0.018149
1,1.584861,-0.266766,-0.982616,1.330344,0.762910,0.232340,-0.162418,-0.553980,-0.054845
2,1.046790,-1.236435,0.797912,1.801436,0.807447,0.041675,0.080375,-0.026016,-0.193752
3,-2.140034,3.236853,1.240137,-1.424484,3.129279,-0.186590,0.365497,-0.455990,-0.867707
4,0.381144,5.249261,2.408188,-2.060440,-1.215336,-0.115066,0.271699,0.621416,0.386019
...,...,...,...,...,...,...,...,...,...
812,-0.814593,-0.360685,0.114740,1.148315,0.174551,-0.469445,-0.001214,0.310749,-0.291676
813,-0.441297,1.012027,3.111796,-1.602960,-2.227377,0.023538,0.824418,-0.210448,-0.009020
814,-1.414113,-0.293841,-2.183952,2.219516,-0.543520,-0.710765,0.317892,0.326624,0.210462
815,0.551489,-0.400571,-0.769623,1.251549,0.884905,-0.215052,0.247686,-0.271708,-0.660209


In [29]:
fc.Dummy_model(pca,y)

0.2962056303549572

In [36]:
fc.fisher_score(X,y)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [50]:
fc.forward_test(X,y)









array([ True,  True,  True,  True,  True, False, False, False,  True,
        True, False,  True, False, False, False,  True, False,  True,
       False, False])

In [51]:
# xx=x_train[:,[0,1,2,3,5,7,8,9,10,17]]
xx=X[:,[0,1,2,3,4,8,9,11,15,17]]
fc.Dummy_model(xx,y)

0.35006119951040393

In [52]:
fc.recursive_feature_elimination(X,y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True, False,  True,
       False, False])

In [53]:
xx=X[:,[0,1,2,3,4,5,6,7,8,9,11,12,13,15,17]]
fc.Dummy_model(xx,y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.40024479804161567

In [55]:
fc.feature_importances_LG(X,y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 17,
       18, 19], dtype=int64)

In [56]:
xx=X[:,[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15, 16, 17,
       18, 19]]
fc.Dummy_model(xx,y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.408812729498164

In [58]:
results=fc.feature_importances_RFT(X,y)
print(results,end="")

[True, True, True, True, True, False, False, True, True, False, False, True, False, False, False, False, False, True, False, False]

In [59]:
xx=X[:,[ 0,  1,  2,  3,  4,  7,  8, 11, 17]]
fc.Dummy_model(xx,y)

0.32313341493268055

In [63]:
fc.mutual_information(X,y)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True, False,  True,  True,  True,
        True,  True])

In [65]:
xx=X[:,[ 0,  1,  2,  3,  4, 5,6, 7,  8, 9, 10, 11, 13, 15, 16, 17, 18, 19]]
fc.Dummy_model(xx,y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.4039167686658507