### Check Model Performance

#### Without Feature Selection

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest

In [2]:
data = pd.read_csv("Dataset/train.csv")

In [3]:
X = data.iloc[:,0:20]
Y = data.iloc[:,-1]

In [4]:
rfc = RandomForestClassifier(max_depth=10)

In [5]:
scores = cross_val_score(rfc, X, Y, cv=10)

In [6]:
print(scores.mean())

0.873


#### With Feature Selection

In [7]:
best_features = SelectKBest(score_func=chi2, k=10)

In [8]:
fit = best_features.fit(X, Y)

In [9]:
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(X.columns)

In [10]:
featureScores = pd.concat([dfcolumns, dfscores], axis=1)

In [11]:
featureScores.columns = ["Feature", "Score"]

In [12]:
ten_features = featureScores.sort_values(by="Score", ascending=False).head(10)['Feature'].values

In [13]:
print(ten_features)

['ram' 'px_height' 'battery_power' 'px_width' 'mobile_wt' 'int_memory'
 'sc_w' 'talk_time' 'fc' 'sc_h']


In [14]:
X_fs = X[ten_features]
X.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,6,905,1988,2631,17,3,7,1,1,0
2,563,1,0.5,1,2,1,41,0.9,145,5,6,1263,1716,2603,11,2,9,1,1,0
3,615,1,2.5,0,0,0,10,0.8,131,6,9,1216,1786,2769,16,8,11,1,0,0
4,1821,1,1.2,0,13,1,44,0.6,141,2,14,1208,1212,1411,8,2,15,1,1,0


In [15]:
scores_new = cross_val_score(rfc, X_fs, Y, cv=10)

In [16]:
print(scores_new.mean())

0.8950000000000001
