# RFECV
* __RFECV (Recursive Features Elimination Cross Validation)__ 
* __It is used for selection of best features using cross validation and recursive features elimination (iterative approach)__

In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFECV

## Loading Dataset

In [3]:
dataset = pd.read_csv("../../../Datasets/car_evaluation.txt", sep=",")
dataset.columns = ['buying','maintenance','doors','persons','lug_boot', 'safety', 'target']

In [4]:
samples_count, features_count = dataset.shape
samples_count, features_count

(1727, 7)

## Separating target column from dataset

In [5]:
target = dataset['target']
dataset = dataset.drop('target',axis=1)

## One-Hot Encoding of Dataset & Target column

### Dataset

In [6]:
ohe_dataset = preprocessing.OneHotEncoder()
ohe_dataset.fit(dataset.values) 
dataset_encoded = ohe_dataset.transform(dataset.values).toarray()

### Target Column

In [7]:
le_target = preprocessing.LabelEncoder()
target_encoded = le_target.fit_transform(target)

## Applying Random Forest using RFECV

In [8]:
x = dataset_encoded
y = target_encoded

In [9]:
random_forest = RandomForestClassifier()
rfecv = RFECV(estimator=random_forest, step=1, cv=4, scoring='accuracy')
rfecv.fit(x, y)

RFECV(cv=4,
      estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                       class_weight=None, criterion='gini',
                                       max_depth=None, max_features='auto',
                                       max_leaf_nodes=None, max_samples=None,
                                       min_impurity_decrease=0.0,
                                       min_impurity_split=None,
                                       min_samples_leaf=1, min_samples_split=2,
                                       min_weight_fraction_leaf=0.0,
                                       n_estimators=100, n_jobs=None,
                                       oob_score=False, random_state=None,
                                       verbose=0, warm_start=False),
      min_features_to_select=1, n_jobs=None, scoring='accuracy', step=1,
      verbose=0)

In [10]:
print("Optimal number of features : %d" % rfecv.n_features_) 
print("Accuracy::",rfecv.score(x,y))

Optimal number of features : 2
Accuracy:: 0.7776491024898669


## Prediction by using input data from user

In [11]:
buying = input("Enter buying category(vhigh,high,med,low)::")
maintenance = input("Enter buying maintenance(vhigh,high,med,low)::")
doors = input("Enter no of doors(2,3,4,5more)::")
persons = input("Enter no of persons(2,4,more)::")
lug_boot = input("Enter lug_boot category(small,med,big)::")
safety = input("Enter safety category(low,med,high)::")

Enter buying category(vhigh,high,med,low)::low
Enter buying maintenance(vhigh,high,med,low)::low
Enter no of doors(2,3,4,5more)::3
Enter no of persons(2,4,more)::4
Enter lug_boot category(small,med,big)::small
Enter safety category(low,med,high)::med


In [12]:
user_sample = [[buying, maintenance, doors, persons, lug_boot, safety]]
user_sample_encoded = ohe_dataset.transform(user_sample).toarray()
result_encoded = rfecv.predict(user_sample_encoded)
result_decoded = le_target.inverse_transform(result_encoded)
result = result_decoded[0]

In [13]:
if result == 'unacc':
    print("Your car is unaccurate")
elif result == 'acc':
    print("Your car is accurate")
elif result == 'good':
    print("Your car is good")
else:
    print("Your car is very good")

Your car is accurate
