In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import all_estimators


In [26]:
data=pd.read_csv('mushrooms.csv')

In [27]:
X = data.drop('class', axis=1)
y = data['class']

In [28]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [29]:
X_encoded = pd.get_dummies(X)

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)


In [31]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [32]:
classifiers = all_estimators(type_filter='classifier')

In [33]:
evaluation_metrics = {
    'Accuracy': accuracy_score,
    'Precision': precision_score,
    'Recall': recall_score,
    'F1-score': f1_score
}

In [52]:
results = []
for name, mod in classifiers:
    model = mod.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    metrics_scores = {metric_name: metric_func(y_test, y_pred) for metric_name, metric_func in evaluation_metrics.items()}
    results.append({'Classifier': name, **metrics_scores})


TypeError: fit() missing 1 required positional argument: 'y'

In [35]:
results_df = pd.DataFrame(results)

In [36]:
print(results_df)

Empty DataFrame
Columns: []
Index: []


In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import sklearn.linear_model as lm

In [18]:
df=pd.read_csv('mushrooms.csv')
display(df.head())
df.shape


Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


(8124, 23)

In [4]:
import sklearn.preprocessing as pre_process
import numpy as np
ordinal_encoding=pre_process.OrdinalEncoder()
standered_scalling=pre_process.StandardScaler()
one_hot=pre_process.OneHotEncoder()

In [22]:
cols=df.columns

In [23]:
from sklearn.compose import make_column_transformer
transform_x=make_column_transformer((one_hot , cols[1:]))
transform_x

ColumnTransformer(transformers=[('onehotencoder', OneHotEncoder(),
                                 Index(['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat'],
      dtype='object'))])

In [27]:
X=df.iloc[:,0:23]
Y=df.iloc[:,0]
Y.head(4)

0    p
1    e
2    e
3    p
Name: class, dtype: object

In [31]:
processed_x=transform_x.fit_transform(X)
pd.DataFrame(processed_x)

Unnamed: 0,0
0,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
1,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 19)\t1.0\n..."
2,"(0, 0)\t1.0\n (0, 8)\t1.0\n (0, 18)\t1.0\n..."
3,"(0, 5)\t1.0\n (0, 9)\t1.0\n (0, 18)\t1.0\n..."
4,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 13)\t1.0\n..."
...,...
8119,"(0, 3)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
8120,"(0, 5)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
8121,"(0, 2)\t1.0\n (0, 8)\t1.0\n (0, 14)\t1.0\n..."
8122,"(0, 3)\t1.0\n (0, 9)\t1.0\n (0, 14)\t1.0\n..."


In [32]:
ordinal_encoder=pre_process.OrdinalEncoder()
processed_y=ordinal_encoder.fit_transform(np.c_[Y])
processed_y

array([[1.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]])

In [33]:
ordinal_encoder.inverse_transform(processed_y)

array([['p'],
       ['e'],
       ['e'],
       ...,
       ['e'],
       ['p'],
       ['e']], dtype=object)

In [34]:
train_x,test_x,train_y,test_y = train_test_split( processed_x , processed_y,test_size=30,
    train_size=70)

In [39]:
from sklearn.utils import all_estimators
estimators = all_estimators(type_filter='classifier')
for name, get_model in estimators:
    try:
         print(name)
    except:
         print('Unable to import', name)

AdaBoostClassifier
BaggingClassifier
BernoulliNB
CalibratedClassifierCV
CategoricalNB
ClassifierChain
ComplementNB
DecisionTreeClassifier
DummyClassifier
ExtraTreeClassifier
ExtraTreesClassifier
GaussianNB
GaussianProcessClassifier
GradientBoostingClassifier
HistGradientBoostingClassifier
KNeighborsClassifier
LabelPropagation
LabelSpreading
LinearDiscriminantAnalysis
LinearSVC
LogisticRegression
LogisticRegressionCV
MLPClassifier
MultiOutputClassifier
MultinomialNB
NearestCentroid
NuSVC
OneVsOneClassifier
OneVsRestClassifier
OutputCodeClassifier
PassiveAggressiveClassifier
Perceptron
QuadraticDiscriminantAnalysis
RadiusNeighborsClassifier
RandomForestClassifier
RidgeClassifier
RidgeClassifierCV
SGDClassifier
SVC
StackingClassifier
VotingClassifier


In [88]:
import sklearn.metrics as sm
from sklearn.utils import all_estimators
estimators = all_estimators(type_filter='classifier')
report={}
model_name=[]
model_precision=[]
get_code=[]
for name, get_model in estimators:
    try:
        model = get_model()
        model.fit(train_x,train_y)
        pred_y=model.predict(test_x)            
        report["Precision"]=sm.precision_score(test_y, pred_y)
        model_precision.append(sm.precision_score(test_y, pred_y))
        model_name.append(name)
        get_code.append(get_model)
    except Exception as e:
        print('Unable to import', name)
        print(e)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Unable to import CategoricalNB
A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Unable to import ClassifierChain
__init__() missing 1 required positional argument: 'base_estimator'
Unable to import GaussianNB
A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Unable to import GaussianProcessClassifier
A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Unable to import HistGradientBoostingClassifier
A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Unable to import LabelPropagation
A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Unable to import LabelSpreading
A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Unable to import LinearDiscriminantAnalys

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  model.fit(train_x,train_y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  return self._fit(X, y)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Unable to import MultiOutputClassifier
__init__() missing 1 required positional argument: 'estimator'
Unable to import OneVsOneClassifier
__init__() missing 1 required positional argument: 'estimator'
Unable to import OneVsRestClassifier
__init__() missing 1 required positional argument: 'estimator'
Unable to import OutputCodeClassifier
__init__() missing 1 required positional argument: 'estimator'
Unable to import QuadraticDiscriminantAnalysis
A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Unable to import RadiusNeighborsClassifier
No neighbors found for test samples array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], dtype=int64), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  self._fit(X, y)
  model.fit(train_x,train_y)


Unable to import StackingClassifier
__init__() missing 1 required positional argument: 'estimators'
Unable to import VotingClassifier
__init__() missing 1 required positional argument: 'estimators'


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [120]:
res={"Model Name":model_name ,"Code": get_code, "Model Precision":model_precision}
results=pd.DataFrame(res)
best=results[results["Model Precision"]==1]
best_algs=best["Code"]
np.array(best_algs)


array([<class 'sklearn.ensemble._forest.ExtraTreesClassifier'>,
       <class 'sklearn.svm._classes.NuSVC'>,
       <class 'sklearn.ensemble._forest.RandomForestClassifier'>,
       <class 'sklearn.linear_model._ridge.RidgeClassifierCV'>,
       <class 'sklearn.svm._classes.SVC'>], dtype=object)

In [127]:
precision=[]
names=[]
for get_model in best_algs:
    try:
        model = get_model()
        model.fit(train_x,train_y)
        pred_y=model.predict(test_x)            
        precision.append(sm.precision_score(test_y, pred_y))
        
    except Exception as e:
        print('Unable to import', name)
        print(e)

  model.fit(train_x,train_y)
  y = column_or_1d(y, warn=True)
  model.fit(train_x,train_y)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [126]:
result={,"Model Precision":precision}
result

{'Model Name': ['VotingClassifier',
  'VotingClassifier',
  'VotingClassifier',
  'VotingClassifier',
  'VotingClassifier'],
 'Model Precision': [0.9375, 1.0, 1.0, 1.0, 1.0]}