In [50]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

In [51]:
df = sns.load_dataset('iris')

In [52]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [53]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [54]:
from sklearn.model_selection import train_test_split

In [55]:
X = df.drop('species', axis =1)
y = df['species']

In [56]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

In [57]:
from sklearn.neighbors import KNeighborsClassifier

In [58]:
model_knn = KNeighborsClassifier(n_neighbors = 4 )

In [59]:
model_knn.fit(X_train,y_train)

0,1,2
,n_neighbors,4
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [60]:
model_knn.score(X_test,y_test)

0.98

In [61]:
from sklearn.svm import SVC

In [62]:
model_svc = SVC(C=30, kernel='rbf', gamma='scale')

In [63]:
model_svc.fit(X_train,y_train)

0,1,2
,C,30
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


# Grid Search CV # 

In [64]:
from sklearn.model_selection import GridSearchCV

In [65]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

model_svc = SVC()   # clean model (NO gamma='to')

param_grid = {
    'C': [1, 10, 30, 50],
    'kernel': ['rbf', 'linear'],
    'gamma': ['scale', 'auto']
}

classifier = GridSearchCV(model_svc, param_grid, cv=5)
classifier.fit(X_train, y_train)

print(classifier.best_params_)
print(classifier.best_score_)


{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
0.95


In [66]:
classifier.fit(X_train,y_train)

0,1,2
,estimator,SVC()
,param_grid,"{'C': [1, 10, ...], 'gamma': ['scale', 'auto'], 'kernel': ['rbf', 'linear']}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,C,1
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [67]:
classifier.cv_results_

{'mean_fit_time': array([0.00616641, 0.00358624, 0.00298553, 0.00378451, 0.00374842,
        0.00416965, 0.00277524, 0.00345936, 0.00354829, 0.00335436,
        0.0031599 , 0.00370636, 0.00377731, 0.00418463, 0.003689  ,
        0.00339394]),
 'std_fit_time': array([0.00255296, 0.00047288, 0.00107703, 0.00069278, 0.00080752,
        0.00145129, 0.0004387 , 0.00082615, 0.00039284, 0.00139182,
        0.00095722, 0.00061451, 0.00128328, 0.00146509, 0.00041576,
        0.00102292]),
 'mean_score_time': array([0.00510993, 0.00280557, 0.00215979, 0.00268507, 0.00279517,
        0.0027905 , 0.00318804, 0.00272341, 0.00317612, 0.00198331,
        0.00185008, 0.00311246, 0.00238729, 0.00238876, 0.00277681,
        0.00249262]),
 'std_score_time': array([0.00203157, 0.000749  , 0.00071836, 0.00076084, 0.00116941,
        0.00114854, 0.00074238, 0.00065693, 0.00075371, 0.00064712,
        0.0013663 , 0.00101671, 0.00048419, 0.00101801, 0.00019547,
        0.00043389]),
 'param_C': masked_array(d

In [68]:
results = pd.DataFrame(classifier.cv_results_)

In [69]:
results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.006166,0.002553,0.00511,0.002032,1,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",1.0,0.85,0.9,1.0,0.9,0.93,0.06,10
1,0.003586,0.000473,0.002806,0.000749,1,scale,linear,"{'C': 1, 'gamma': 'scale', 'kernel': 'linear'}",1.0,0.9,0.9,1.0,0.95,0.95,0.044721,1
2,0.002986,0.001077,0.00216,0.000718,1,auto,rbf,"{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}",0.95,0.9,0.9,1.0,0.95,0.94,0.037417,3
3,0.003785,0.000693,0.002685,0.000761,1,auto,linear,"{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}",1.0,0.9,0.9,1.0,0.95,0.95,0.044721,1
4,0.003748,0.000808,0.002795,0.001169,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",0.95,0.9,0.9,1.0,0.95,0.94,0.037417,3
5,0.00417,0.001451,0.00279,0.001149,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",0.95,0.85,0.9,1.0,0.95,0.93,0.05099,14
6,0.002775,0.000439,0.003188,0.000742,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",0.95,0.8,0.9,1.0,1.0,0.93,0.074833,10
7,0.003459,0.000826,0.002723,0.000657,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",0.95,0.85,0.9,1.0,0.95,0.93,0.05099,14
8,0.003548,0.000393,0.003176,0.000754,30,scale,rbf,"{'C': 30, 'gamma': 'scale', 'kernel': 'rbf'}",0.95,0.85,0.9,1.0,1.0,0.94,0.05831,5
9,0.003354,0.001392,0.001983,0.000647,30,scale,linear,"{'C': 30, 'gamma': 'scale', 'kernel': 'linear'}",0.95,0.85,0.9,1.0,1.0,0.94,0.05831,5


In [70]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC

model_svc = SVC()   # clean SVC model

param_dist = {
    'C': [1, 10, 30, 50],
    'kernel': ['rbf', 'linear']
}

classifier_r = RandomizedSearchCV(
    model_svc,
    param_distributions=param_dist,
    n_iter=4,
    cv=5,
    return_train_score=False,
    random_state=42
)

classifier_r.fit(X_train, y_train)

print("Best parameters:", classifier_r.best_params_)
print("Best score:", classifier_r.best_score_)


Best parameters: {'kernel': 'linear', 'C': 1}
Best score: 0.95


In [71]:
classifier_r.fit(X,y)

0,1,2
,estimator,SVC()
,param_distributions,"{'C': [1, 10, ...], 'kernel': ['rbf', 'linear']}"
,n_iter,4
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,random_state,42

0,1,2
,C,1
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [72]:
results = pd.DataFrame(classifier_r.cv_results_)

In [73]:
results[['param_C', 'param_kernel', 'mean_test_score']]


Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.98
1,30,linear,0.96
2,1,rbf,0.966667
3,50,linear,0.966667


# Boosting # 

In [74]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns

In [75]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [76]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

In [77]:
base_learners = [
    ('dt', DecisionTreeClassifier(random_state=42)),
    ('svc', SVC(probability=True, kernel='rbf', random_state=42)),
    ('lr', LogisticRegression(max_iter=1000))
]

In [78]:
meta_learner = LogisticRegression(max_iter=1000)

In [79]:
stacking_clf = StackingClassifier(
    estimators=base_learners,
    final_estimator=meta_learner,
    cv=5
)

In [80]:
stacking_clf.fit(X_train,y_train)

0,1,2
,estimators,"[('dt', ...), ('svc', ...), ...]"
,final_estimator,LogisticRegre...max_iter=1000)
,cv,5
,stack_method,'auto'
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [81]:
y_pred = stacking_clf.predict( X_test)

In [82]:
accuracy = accuracy_score(y_test, y_pred)

In [83]:
accuracy

0.9666666666666667

# Random Forest Tree # 

In [84]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest Classifier model
rf_model = RandomForestClassifier(
    n_estimators=100,       # number of trees in the forest
    max_depth=None,         # let trees grow fully (until leaves are pure or all leaves contain less than min_samples_split samples)
    random_state=42         # ensures reproducibility
)

In [85]:
rf_model.fit(X_train,y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [86]:
y_pred = rf_model.predict( X_test)

In [87]:
accuracy = accuracy_score(y_test, y_pred)

In [88]:
accuracy

0.9

# Adda Boost # 

In [94]:
ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)

ada_model.fit(X_train, y_train)

0,1,2
,estimator,
,n_estimators,100
,learning_rate,1.0
,algorithm,'deprecated'
,random_state,42


In [95]:
y_pred = ada_model.predict(X_test)

In [96]:
accuracy = accuracy_score(y_test, y_pred)

In [97]:
accuracy

0.9333333333333333

# Gradient Boost # 

In [98]:
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

In [100]:
gb_model.fit(X_train, y_train)

0,1,2
,loss,'log_loss'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,3
,min_impurity_decrease,0.0


In [101]:
y_pred = gb_model.predict(X_test)

In [102]:
accuracy = accuracy_score(y_test, y_pred)

In [103]:
accuracy

0.9666666666666667