In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils import all_estimators


In [2]:
data=pd.read_csv('mushrooms.csv')
data.shape

(8124, 23)

In [3]:
X = data.drop('class', axis=1)
y = data['class']
print(y)

0       p
1       e
2       e
3       p
4       e
       ..
8119    e
8120    e
8121    e
8122    p
8123    e
Name: class, Length: 8124, dtype: object


In [4]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y).reshape(-1,1)
y

array([[1],
       [0],
       [0],
       ...,
       [0],
       [1],
       [0]])

In [5]:
X_encoded = pd.get_dummies(X)
X_encoded

Unnamed: 0,cap-shape_b,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_f,cap-surface_g,cap-surface_s,cap-surface_y,...,population_s,population_v,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,0,0,0,0,0,1,0,0,1,0,...,1,0,0,0,0,0,0,0,1,0
1,0,0,0,0,0,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,1,0,0,0,1,...,1,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,0,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
8120,0,0,0,0,0,1,0,0,1,0,...,0,1,0,0,0,1,0,0,0,0
8121,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
8122,0,0,0,1,0,0,0,0,0,1,...,0,1,0,0,0,1,0,0,0,0


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3)


In [7]:
classifiers = all_estimators(type_filter='classifier')
classifiers


[('AdaBoostClassifier', sklearn.ensemble._weight_boosting.AdaBoostClassifier),
 ('BaggingClassifier', sklearn.ensemble._bagging.BaggingClassifier),
 ('BernoulliNB', sklearn.naive_bayes.BernoulliNB),
 ('CalibratedClassifierCV', sklearn.calibration.CalibratedClassifierCV),
 ('CategoricalNB', sklearn.naive_bayes.CategoricalNB),
 ('ClassifierChain', sklearn.multioutput.ClassifierChain),
 ('ComplementNB', sklearn.naive_bayes.ComplementNB),
 ('DecisionTreeClassifier', sklearn.tree._classes.DecisionTreeClassifier),
 ('DummyClassifier', sklearn.dummy.DummyClassifier),
 ('ExtraTreeClassifier', sklearn.tree._classes.ExtraTreeClassifier),
 ('ExtraTreesClassifier', sklearn.ensemble._forest.ExtraTreesClassifier),
 ('GaussianNB', sklearn.naive_bayes.GaussianNB),
 ('GaussianProcessClassifier',
  sklearn.gaussian_process._gpc.GaussianProcessClassifier),
 ('GradientBoostingClassifier',
  sklearn.ensemble._gb.GradientBoostingClassifier),
 ('HistGradientBoostingClassifier',
  sklearn.ensemble._hist_gradi

In [8]:
evaluation_metrics = {
    'Accuracy': accuracy_score,
    'Precision': precision_score,
    'Recall': recall_score,
    'F1-score': f1_score
}


In [9]:
X_train.shape

(5686, 117)

In [10]:
y_train.shape

(5686, 1)

In [11]:
results = []
for name, mod in classifiers:
    try:
        model = mod.fit(X_train,y_train)
        y_pred = model.predict(X_test_scaled)
        metrics_scores = {metric_name: metric_func(y_test, y_pred) for metric_name,
                          metric_func in evaluation_metrics.items()}
        results.append({'Classifier': name, **metrics_scores})
    except Exception as e:
        print("Unable to Import",name)
        print(e)

Unable to Import AdaBoostClassifier
fit() missing 1 required positional argument: 'y'
Unable to Import BaggingClassifier
fit() missing 1 required positional argument: 'y'
Unable to Import BernoulliNB
fit() missing 1 required positional argument: 'y'
Unable to Import CalibratedClassifierCV
fit() missing 1 required positional argument: 'y'
Unable to Import CategoricalNB
fit() missing 1 required positional argument: 'y'
Unable to Import ClassifierChain
fit() missing 1 required positional argument: 'Y'
Unable to Import ComplementNB
fit() missing 1 required positional argument: 'y'
Unable to Import DecisionTreeClassifier
fit() missing 1 required positional argument: 'y'
Unable to Import DummyClassifier
fit() missing 1 required positional argument: 'y'
Unable to Import ExtraTreeClassifier
fit() missing 1 required positional argument: 'y'
Unable to Import ExtraTreesClassifier
fit() missing 1 required positional argument: 'y'
Unable to Import GaussianNB
fit() missing 1 required positional argu

In [12]:
results_df = pd.DataFrame(results)

In [13]:
print(results_df)

Empty DataFrame
Columns: []
Index: []


In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import sklearn.linear_model as lm

In [15]:
df=pd.read_csv('mushrooms.csv')
display(df.head())
df.shape


Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


(8124, 23)

In [16]:
import sklearn.preprocessing as pre_process
import numpy as np
ordinal_encoding=pre_process.OrdinalEncoder()
standered_scalling=pre_process.StandardScaler()
one_hot=pre_process.OneHotEncoder()

In [17]:
cols=df.columns

In [18]:
from sklearn.compose import make_column_transformer
transform_x=make_column_transformer((one_hot , cols[1:]))
transform_x

ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(),
                                 Index(['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat'],
      dtype='object'))])

In [19]:
X=df.iloc[:,0:23]
Y=df.iloc[:,0]
Y.head(4)

0    p
1    e
2    e
3    p
Name: class, dtype: object

In [20]:
processed_x=transform_x.fit_transform(X)
pd.DataFrame(processed_x)

Unnamed: 0,0
0,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
1,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 19)\t1.0\n..."
2,"(0, 0)\t1.0\n (0, 8)\t1.0\n (0, 18)\t1.0\n..."
3,"(0, 5)\t1.0\n (0, 9)\t1.0\n (0, 18)\t1.0\n..."
4,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 13)\t1.0\n..."
...,...
8119,"(0, 3)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
8120,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
8121,"(0, 2)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
8122,"(0, 3)\t1.0\n (0, 9)\t1.0\n (0, 14)\t1.0\n..."


In [21]:
ordinal_encoder=pre_process.OrdinalEncoder()
processed_y=ordinal_encoder.fit_transform(np.c_[Y])
processed_y

array([[1.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]])

In [22]:
ordinal_encoder.inverse_transform(processed_y)

array([['p'],
       ['e'],
       ['e'],
       ...,
       ['e'],
       ['p'],
       ['e']], dtype=object)

In [23]:
train_x,test_x,train_y,test_y = train_test_split( processed_x , processed_y,test_size=30,
    train_size=70)

In [24]:
from sklearn.utils import all_estimators
estimators = all_estimators(type_filter='classifier')
for name, get_model in estimators:
    try:
         print(name)
    except:
         print('Unable to import', name)

AdaBoostClassifier
BaggingClassifier
BernoulliNB
CalibratedClassifierCV
CategoricalNB
ClassifierChain
ComplementNB
DecisionTreeClassifier
DummyClassifier
ExtraTreeClassifier
ExtraTreesClassifier
GaussianNB
GaussianProcessClassifier
GradientBoostingClassifier
HistGradientBoostingClassifier
KNeighborsClassifier
LabelPropagation
LabelSpreading
LinearDiscriminantAnalysis
LinearSVC
LogisticRegression
LogisticRegressionCV
MLPClassifier
MultiOutputClassifier
MultinomialNB
NearestCentroid
NuSVC
OneVsOneClassifier
OneVsRestClassifier
OutputCodeClassifier
PassiveAggressiveClassifier
Perceptron
QuadraticDiscriminantAnalysis
RadiusNeighborsClassifier
RandomForestClassifier
RidgeClassifier
RidgeClassifierCV
SGDClassifier
SVC
StackingClassifier
VotingClassifier


In [31]:
import sklearn.metrics as sm
from sklearn.utils import all_estimators
estimators = all_estimators(type_filter='classifier')
report={}
model_name=[]
model_precision=[]
get_code=[]
for name, get_model in estimators:
    try:
        model = get_model()
        model.fit(trai.toarray(),train_y)
        pred_y=model.predict(test_x)            
        report["Precision"]=sm.precision_score(test_y, pred_y)
        model_precision.append(sm.precision_score(test_y, pred_y))
        model_name.append(name)
        get_code.append(get_model)
    except Exception as e:
        print('Unable to import', name)
        print(e)

Unable to import AdaBoostClassifier
name 'trai' is not defined
Unable to import BaggingClassifier
name 'trai' is not defined
Unable to import BernoulliNB
name 'trai' is not defined
Unable to import CalibratedClassifierCV
name 'trai' is not defined
Unable to import CategoricalNB
name 'trai' is not defined
Unable to import ClassifierChain
__init__() missing 1 required positional argument: 'base_estimator'
Unable to import ComplementNB
name 'trai' is not defined
Unable to import DecisionTreeClassifier
name 'trai' is not defined
Unable to import DummyClassifier
name 'trai' is not defined
Unable to import ExtraTreeClassifier
name 'trai' is not defined
Unable to import ExtraTreesClassifier
name 'trai' is not defined
Unable to import GaussianNB
name 'trai' is not defined
Unable to import GaussianProcessClassifier
name 'trai' is not defined
Unable to import GradientBoostingClassifier
name 'trai' is not defined
Unable to import HistGradientBoostingClassifier
name 'trai' is not defined
Unable to

In [26]:
res={"Model Name":model_name ,"Code": get_code, "Model Precision":model_precision}
results=pd.DataFrame(res)
display(results)
best=results[results["Model Precision"]>=1]
best_algs=pd.DataFrame(best["Code"])
display(best_algs)

Unnamed: 0,Model Name,Code,Model Precision
0,AdaBoostClassifier,<class 'sklearn.ensemble._weight_boosting.AdaB...,0.944444
1,BaggingClassifier,<class 'sklearn.ensemble._bagging.BaggingClass...,0.894737
2,BernoulliNB,<class 'sklearn.naive_bayes.BernoulliNB'>,0.941176
3,CalibratedClassifierCV,<class 'sklearn.calibration.CalibratedClassifi...,0.944444
4,ComplementNB,<class 'sklearn.naive_bayes.ComplementNB'>,0.944444
5,DecisionTreeClassifier,<class 'sklearn.tree._classes.DecisionTreeClas...,1.0
6,DummyClassifier,<class 'sklearn.dummy.DummyClassifier'>,0.0
7,ExtraTreeClassifier,<class 'sklearn.tree._classes.ExtraTreeClassif...,0.894737
8,ExtraTreesClassifier,<class 'sklearn.ensemble._forest.ExtraTreesCla...,0.944444
9,GradientBoostingClassifier,<class 'sklearn.ensemble._gb.GradientBoostingC...,1.0


Unnamed: 0,Code
5,<class 'sklearn.tree._classes.DecisionTreeClas...
9,<class 'sklearn.ensemble._gb.GradientBoostingC...
10,<class 'sklearn.neighbors._classification.KNei...


In [27]:
precision=[]
names=[]
for get_model in best_algs:
    try:
        model = get_model()
        model.fit(train_x,train_y)
        pred_y=model.predict(test_x)            
        precision.append(sm.precision_score(test_y, pred_y))
    except Exception as e:
        print('Unable to import', name)
        print(e)

Unable to import VotingClassifier
'str' object is not callable
