In [1]:
#load libraries
from sklearn.ensemble import AdaBoostClassifier
from sklearn import datasets
#import train_test_split function
from sklearn.model_selection import train_test_split
#import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

In [2]:
#load data
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [3]:
#split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 70% training and 30% test

In [4]:
#create adaboost classifier object
abc = AdaBoostClassifier(n_estimators=50,learning_rate=1)
#training adaboost classifier
model = abc.fit(X_train, y_train)
#predict the response for test dataset
y_pred = model.predict(X_test)

In [5]:
#model accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9777777777777777


In [6]:
!git clone https://github.com/eriklindernoren/ML-From-Scratch

Cloning into 'ML-From-Scratch'...
remote: Enumerating objects: 2558, done.[K
remote: Total 2558 (delta 0), reused 0 (delta 0), pack-reused 2558[K
Receiving objects: 100% (2558/2558), 553.45 KiB | 4.69 MiB/s, done.
Resolving deltas: 100% (1960/1960), done.


In [7]:
import sys
sys.path.append('/content/ML-From-Scratch')

In [8]:
from __future__ import print_function, division
import numpy as np
from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize

class LDA():
    """The Linear Discriminant Analysis classifier, also known as Fisher's linear discriminant.
    Can besides from classification also be used to reduce the dimensionaly of the dataset.
    """
    def __init__(self):
        self.w = None

    def transform(self, X, y):
        self.fit(X, y)
        # Project data onto vector
        X_transform = X.dot(self.w)
        return X_transform

    def fit(self, X, y):
        # Separate data by class
        X1 = X[y == 0]
        X2 = X[y == 1]

        # Calculate the covariance matrices of the two datasets
        cov1 = calculate_covariance_matrix(X1)
        cov2 = calculate_covariance_matrix(X2)
        cov_tot = cov1 + cov2

        # Calculate the mean of the two datasets
        mean1 = X1.mean(0)
        mean2 = X2.mean(0)
        mean_diff = np.atleast_1d(mean1 - mean2)

        # Determine the vector which when X is projected onto it best separates the
        # data by class. w = (mean1 - mean2) / (cov1 + cov2)
        self.w = np.linalg.pinv(cov_tot).dot(mean_diff)

    def predict(self, X):
        y_pred = []
        for sample in X:
            h = sample.dot(self.w)
            y = 1 * (h < 0)
            y_pred.append(y)
        return y_pred
