# Q8

In [19]:
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
import pandas as pd

In [13]:
mnist = fetch_openml('mnist_784', as_frame= True)
X, y= mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 2/7, random_state= 42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size= 0.5, random_state= 43)

In [14]:
rf_clf = RandomForestClassifier(n_estimators= 100,
                                max_depth= 10,
                                random_state= 45).fit(X_train, y_train)
yrf_val_hat = rf_clf.predict(X_val)
print(f"Random forrest accuracy = {accuracy_score(y_val, yrf_val_hat) * 100:.2f}%")

et_clf = ExtraTreesClassifier(n_estimators= 100,
                              random_state= 46).fit(X_train, y_train)
yet_val_hat = et_clf.predict(X_val)
print(f"ExtraTrees accuracy = {accuracy_score(y_val, yet_val_hat) * 100:.2f}%")

knn_clf = KNeighborsClassifier().fit(X_train, y_train)
knn_vat_hat = knn_clf.predict(X_val) 
print(f"kNN accuracy = {accuracy_score(y_val, knn_vat_hat) * 100:.2f}%")

Random forrest accuracy = 94.59%
ExtraTrees accuracy = 96.80%
kNN accuracy = 96.74%


In [16]:
ens = VotingClassifier(estimators= [('rf', rf_clf),
                                    ('et', et_clf),
                                    ('kNN', knn_clf)],
                       voting= 'soft').fit(X_train, y_train)
print(f"Soft voting accuracy = {accuracy_score(y_val, ens.predict(X_val)) * 100:.2f}%")

Soft voting accuracy = 97.12%


# Q9

In [20]:
blender_data = pd.DataFrame({'rf': yrf_val_hat,
                            'et': yet_val_hat,
                            'kNN': knn_vat_hat,
                            'y': y_val})

blender_model = SVC(probability= True).fit(blender_data[['rf', 'et', 'kNN']], blender_data['y'])

## Testing

In [21]:
yrf_test = rf_clf.predict(X_test)
print(f"Random forrest accuracy = {accuracy_score(y_test, yrf_test) * 100:.2f}%")

yet_test = et_clf.predict(X_test)
print(f"ExtraTrees accuracy = {accuracy_score(y_test, yet_test) * 100:.2f}%")

knn_test = knn_clf.predict(X_test)
print(f"kNN accuracy = {accuracy_score(y_test, knn_test) * 100:.2f}%")

Random forrest accuracy = 94.30%
ExtraTrees accuracy = 96.85%
kNN accuracy = 96.92%


In [22]:
blender_test = pd.DataFrame({'rf': yrf_test,
                            'et': yet_test,
                            'kNN': knn_test})

blender_test_yhat = blender_model.predict(blender_test[['rf', 'et', 'kNN']])
print(f"Blender accuracy = {accuracy_score(y_test, blender_test_yhat) * 100:.2f}%")

Blender accuracy = 96.04%


In [23]:
stk_clf = StackingClassifier(estimators= [('rf', rf_clf),
                                        ('et', et_clf),
                                        ('kNN', knn_clf)],
                             final_estimator= SVC(probability= True)).fit(X_train, y_train)

stk_test_yhat = stk_clf.predict(X_test)
print(f"Stacking accuracy = {accuracy_score(y_test, stk_test_yhat) * 100:.2f}%")

Stacking accuracy = 97.93%
