In [188]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier


In [169]:
pip install ucimlrepo



In [170]:
from ucimlrepo import fetch_ucirepo
connectionist_bench_sonar_mines_vs_rocks = fetch_ucirepo(id=151)
X = connectionist_bench_sonar_mines_vs_rocks.data.features
y = connectionist_bench_sonar_mines_vs_rocks.data.targets

# Using Decision Tree with default Settings

In [171]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

In [172]:
one_hot_encoder = OneHotEncoder(sparse_output=False,handle_unknown='ignore')
y_train = one_hot_encoder.fit_transform(y_train)
y_test = one_hot_encoder.transform(y_test)

In [173]:
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)

In [174]:
dt.score(X_test,y_test)

0.7115384615384616

# Using Grid Search CV

In [175]:
path = dt.cost_complexity_pruning_path(X_train,y_train)

In [176]:
params = {
    'max_depth' : [2,3,5,10],
    'ccp_alpha' : path.ccp_alphas,
}
grid_cv = GridSearchCV(DecisionTreeClassifier(random_state=42),param_grid=params,cv=5,scoring='accuracy')

In [177]:
grid_cv.fit(X_train,y_train)


In [178]:
score = {}
for i in path.ccp_alphas:
  dt = DecisionTreeClassifier(ccp_alpha=i)
  dt.fit(X_train,y_train)
  score[i] = dt.score(X_test,y_test)

# Without alpha

In [179]:
dt = DecisionTreeClassifier(ccp_alpha= 0.00961538, max_depth= 3,random_state=42)
dt.fit(X_train,y_train)
dt.score(X_test,y_test)

0.8076923076923077

# With grid search parameters

In [180]:
dt = DecisionTreeClassifier(ccp_alpha= 0.00961538,max_depth=5,random_state=42)
dt.fit(X_train,y_train)
dt.score(X_test,y_test)

0.75

# Using Random Forest

In [181]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train,y_train)

In [182]:
rf.score(X_test,y_test)

0.8269230769230769

# Using Random Forest With Grid Search Cv

In [183]:
params_ = {

'n_estimators' : [100,500,1000],
'bootstrap' : [True],
'max_features' : ['sqrt', 'log2', None],
'max_samples' : [None,0.25,0.30,0.50,1]

}

my_cv = GridSearchCV(RandomForestClassifier(random_state=42),param_grid=params_,cv=5,scoring='accuracy')


In [184]:
my_cv.fit(X_train,y_train)

In [185]:
my_cv.best_estimator_

In [186]:
my_cv.best_score_

np.float64(0.8334677419354838)

In [187]:
rf = RandomForestClassifier(random_state=42,max_features='log2')
rf.fit(X_train,y_train)
rf.score(X_test,y_test)

0.8461538461538461

# Result With Extra Trees Classifier

In [196]:
from sklearn.ensemble import ExtraTreesClassifier
extreme_trees = ExtraTreesClassifier(random_state=42,max_features='log2')
extreme_trees.fit(X_train,y_train)
extreme_trees.score(X_test,y_test)

0.9230769230769231