## **Exercise 17.03**
### Classification Model after Automated Feature Generation

### Importing modules

In [2]:
import pandas as pd

### Loading data

In [None]:
auth = pd.read_csv('https://raw.githubusercontent.com/fenago/datasets/main/BankNote_Authentication.csv')
auth.head()

### Popping traget variable

In [4]:
Y = auth.pop('class')

### Train-test Split

In [5]:
from sklearn.model_selection import train_test_split

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(auth, Y, 
test_size=0.3, random_state=123)

### Using pipeline to transform categorical variable and numeric variables

In [6]:

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder

categorical_transformer = Pipeline(steps=[('onehot', 
OneHotEncoder(handle_unknown='ignore'))])
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

### Defining data types for numeric and categorical features

In [7]:

numeric_features = auth.select_dtypes(include=['int64', 'float64']).columns
categorical_features = auth.select_dtypes(include=['object']).columns


### Defining preprocessor

In [8]:

from sklearn.compose import ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

### Defining the estimator for processing and classification

In [9]:

from sklearn.linear_model import LogisticRegression
estimator = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier',LogisticRegression(random_state=123))])

### Fit the estimator on the training set

In [None]:

estimator.fit(X_train, y_train)  
print("model score: %.2f" % estimator.score(X_test, y_test))

### Predict on the test set

In [11]:

pred = estimator.predict(X_test)

### Generating classification report

In [None]:

from sklearn.metrics import classification_report
print(classification_report(pred,y_test))

In [12]:
auth['notesID'] = auth.index.values
auth['notesID'] = 'notes' + auth['notesID'].astype(str)

### Creating skewnessId

In [13]:
auth['skewnessId'] = 0
auth.loc[auth.skewness == '1','skewnessId']= 1

### Creating curtosisId

In [14]:
auth['curtosisId'] = 0
auth.loc[auth.curtosis == '1','curtosisId']= 1

### Creating Entropy ID

In [15]:
auth['entropyID'] = 0
auth.loc[auth.entropy == '1','entropyID']= 1

### Displaying the new data frame after adding the ids

In [None]:
auth.head()

### Installing feature tools

In [None]:
!pip3  install featuretools

### Importing necessary libraries

In [None]:
import featuretools as ft
import numpy as np

In [19]:
notesEntities = ft.EntitySet(id = 'Notes')

### Mapping a dataframe to the entityset to form the parent entity

In [None]:
notesEntities.add_dataframe(dataframe_name = 'Variance', dataframe = auth, index = 'notesID')

### Mapping Skewness and setting the relationship

In [None]:
notesEntities.normalize_dataframe(base_dataframe_name='Variance', new_dataframe_name='Skewness', index = 'skewnessId', additional_columns = ['curtosis'])

### Mapping Curtosis and Entropy entities

In [None]:
notesEntities.normalize_dataframe(base_dataframe_name='Variance', new_dataframe_name='Curtosis', index = 'curtosisId', additional_columns = ['curtosis'])
notesEntities.normalize_dataframe(base_dataframe_name='Variance', new_dataframe_name='Entropy', index = 'entropyID', additional_columns = ['entropy'])

### Creating feature sets using Deep Feature Synthesis

In [None]:
feature_set, feature_names = ft.dfs(entityset=notesEntities, target_dataframe_name = 'Variance', \
            max_depth = 2, verbose = 1, n_jobs = 1)


### Reindexing the feature_set

In [24]:
feature_set = feature_set.reindex(index=auth['notesID'])
feature_set = feature_set.reset_index()

### Verifying the shape of the features and original bank data

In [None]:

print(feature_set.shape)
print(auth.shape)

### Printing head of the feature set

In [None]:

feature_set.head()

### Printing the list of all features

In [None]:

feature_names

### Creating aggregation and transformation primitives

In [28]:
aggPrimitives=[
        'std', 'min', 'max', 'mean', 
        'last', 'count' ]
tranPrimitives=[
        'percentile', 
         'subtract_numeric', 'divide_numeric']

### Defining the new set of features

In [None]:

feature_set, feature_names = ft.dfs(entityset=notesEntities, target_dataframe_name = 'Variance',\
        agg_primitives=aggPrimitives,trans_primitives=tranPrimitives,max_depth = 2, \
                  verbose = 1, n_jobs = 1)