<a href="https://colab.research.google.com/github/jmatt724/VotingClassifier/blob/main/RandomForrest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
dataset = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Chapter 7/drug200 (1).csv")

In [None]:
dataset.isnull().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [None]:
X = dataset.drop('Drug', axis = 1)
y = dataset['Drug']

# Encoding categorical data in independent variables (X)

In [None]:
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder()
X['BP'] = encoder.fit_transform(X[['BP']])

In [None]:
X['Cholesterol'] = encoder.fit_transform(X[['Cholesterol']])

In [None]:
X = pd.get_dummies(X, ['Sex'])

In [None]:
X.head()

Unnamed: 0,Age,BP,Cholesterol,Na_to_K,Sex_F,Sex_M
0,23,0.0,0.0,25.355,1,0
1,47,1.0,0.0,13.093,0,1
2,47,1.0,0.0,10.114,0,1
3,28,2.0,0.0,7.798,1,0
4,61,1.0,0.0,18.043,1,0


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, stratify=y)

In [None]:
from sklearn.preprocessing import StandardScaler

sc_x = StandardScaler()
X_train = sc_x.fit_transform(X_train)
X_test = sc_x.transform(X_test)

# Voting Classifier

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC

In [None]:
log_clf = LogisticRegression(solver = 'lbfgs', 
                             multi_class='multinomial', 
                             max_iter = 10000)
rnd_clf = RandomForestClassifier()

svm_clf = SVC(gamma='auto', probability = True)

In [None]:
voting_clf = VotingClassifier(
    estimators=[
        ('lr', log_clf),
        ('rf', rnd_clf),
        ('svc', svm_clf)],
    voting='soft') #voting = 'soft'

In [None]:
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(max_iter=10000,
                                                 multi_class='multinomial')),
                             ('rf', RandomForestClassifier()),
                             ('svc', SVC(gamma='auto', probability=True))],
                 voting='soft')

In [None]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  print(clf.__class__.__name__, accuracy_score(y_test, y_pred))


LogisticRegression 0.875
RandomForestClassifier 0.975
SVC 0.825
VotingClassifier 0.975


In [None]:
y_pred_voting = voting_clf.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(y_test, y_pred_voting))

[[17  0  0  0  1]
 [ 0  5  0  0  0]
 [ 0  0  3  0  0]
 [ 0  0  0  3  0]
 [ 0  0  0  0 11]]


In [None]:
print(classification_report(y_test, y_pred_voting))

              precision    recall  f1-score   support

       DrugY       1.00      0.94      0.97        18
       drugA       1.00      1.00      1.00         5
       drugB       1.00      1.00      1.00         3
       drugC       1.00      1.00      1.00         3
       drugX       0.92      1.00      0.96        11

    accuracy                           0.97        40
   macro avg       0.98      0.99      0.99        40
weighted avg       0.98      0.97      0.98        40

