In [1]:
import numpy as np


class LogisticRegression:
    """
    1. Initialise weight and bias with random.randn according to number of features and number of classes
    2. Encode the target feature using one hot encoder (Convert 1D into multi dimension)
    3. Calculate the gradient descent and update weight matrix:
        1. Project Z
        2. With some activation function calculate probability of Z (Softmax):
            Softmax e^x/ sum(e^x)
        3. Calculate Error
        4. CAlculate with dot product of error of X with weight
        5. Db with summation of error in row
    4. Predict using updated weight and bias , calculate probability with sigmoid function and take argmax
    """
    def __init__(self, learning_rate=0.01, max_iters=1000):
        self.weights = None
        self.bias = None
        self.learning_rate = learning_rate
        self.max_iters = max_iters

    def softmax(self, vector):
        """
        Helper class to calculate sigmoid value
        :param vector:
        :return:
        """
        e = np.exp(vector)
        return (e / np.sum(e, axis=1, keepdims=True))

    def oneHotEncoder(self, nSamples, y):
        """
        Helper function for one hot encoding
        :param nSamples:
        :param y:
        :return:
        """
        y_encode = np.zeros((nSamples, len(np.unique(y))))
        for j in range(nSamples):
            y_encode[j][y[j]] = 1
        return y_encode

    def fit(self, X, y):
        """
        Update weight and bias according to Logistic regression function
        :param X:
        :param y:
        :return:
        """
        nSamples, nFeatures = X.shape
        num_classes = len(np.unique(y))
        loss = []
        # np.random.randn(123)
        self.bias = np.random.randn(num_classes)  # initializing random normalized bias (For each classes)
        self.weights = np.random.randn(nFeatures, num_classes)  # initializing random normalized weights (For Each Num features, Class)
        y_encode = self.oneHotEncoder(nSamples, y)  # Encode to one hot encoder
        # iterating to max_epochs
        for i in range(self.max_iters):
            Z = np.dot(X, self.weights) + self.bias # calculate Z
            prob_y = self.softmax(Z) # get probability using softmax
            error = prob_y - y_encode # residual error
            dw = (1 / nSamples) * np.dot(X.T, error) # dw of weight
            db = (1 / nSamples) * np.sum(error, axis=0) # db for bias
            self.weights -= self.learning_rate * dw # update weight 
            self.bias -= self.learning_rate * db # undate bias
            loss.append(-np.mean(np.dot(y, np.log(prob_y))) - np.dot((1 - y) ,np.log(1-prob_y))) # calculate loss

    def predict(self, X):
        """
        Predicting the value taking the final best update weight and bias
        :param X: input
        :return: return predicted value
        """
        Z = np.dot(X, self.weights) + self.bias # get z
        prob_y = self.softmax(Z) # get prob
        return np.argmax(prob_y, axis=1) # get highest  prob clas

    def score(self, y_pred, y):
        """
        Get MSE score btw actual and predicted
        :param y_pred:
        :param y:
        :return:
        """
        return np.mean(y_pred == y) # get mse

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt

# Load Iris dataset
iris = load_iris()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1, stratify=iris.target)
# Preprocess data
scaler = StandardScaler() # use standar scaler
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
logreg = LogisticRegression() # get model
logreg.fit(X_train[:,2:4], y_train)

# Evaluate on test set
y_pred = logreg.predict(X_test[:,2:4]) # Petal Feature

acc = logreg.score(y_pred,y_test)
acc = acc * 100

print(f'Test accuracy: {acc:.2f}') # get accuaracy

# Figure Plot
fig, ax = plt.subplots(figsize=(10,5))
plot_decision_regions(X_train[:,2:4], y_train, clf=logreg, legend=1,ax=ax)
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.title('Logistic Regression on Iris - Petal')
plt.show()


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt

# Load Iris dataset
iris = load_iris()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1, stratify=iris.target)
# Preprocess data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
logreg = LogisticRegression()
logreg.fit(X_train[:,:2], y_train) # sepal Features 

# Evaluate on test set
y_pred = logreg.predict(X_test[:,:2])

acc = logreg.score(y_pred,y_test) # get Accuaracy
acc = acc * 100
print(f'Test accuracy: {acc:.2f}')

# plot decission boundary
fig, ax = plt.subplots(figsize=(10,5))
plot_decision_regions(X_train[:,:2], y_train, clf=logreg, legend=1,ax=ax)
plt.xlabel('Sepal length (cm)')
plt.ylabel('Sepal width (cm)')
plt.title('Logistic Regression on Iris - Sepal')
plt.show()

In [11]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1, stratify=iris.target)
# Preprocess data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
logreg = LogisticRegression()
logreg.fit(X_train, y_train) # ALl Features

# Evaluate on test set
y_pred = logreg.predict(X_test)

acc = logreg.score(y_pred,y_test) # get MSE
acc = acc*100
print(f'Test accuracy: {acc:.2f}')


Test accuracy: 80.00


In [13]:
class LDA:
    """
    Powerfull tool for dimensionality reduction and classification
    1.Calculate Mean Vector for each class
    2. Calculate covariance vector  to measure the spread of the data around the mean vector
    3. Compute between-class scatter matrix (Variance between mean vector of each class) Sum of difference of each class and overall mean vactor
    4. Compute within-class scatter matrix (Variance between within class) sum of covariance matrix for each class
    5. Compute eigenvectors (in the direction where the data varies most)  and eigenvalues (Amount of variance in each of those direction) of (Sw^-1)Sb (product of inverse of within class scatter matrix and between class scatter matrix
    6. Sort these eigenvectors in descending order. select top k eigen vectors of new vector space. L is the number of new features
    7. create Projection Matrix which transform original into new feature
    8. Project samples in this new feature space .(Multiply centered feature matrix transpose with projection matrix)
    9. Use threshold to classify each sample. Threshold determined by maximizing separation between mean of two classes
    """
    def __init__(self):
        self.w = None
        self.b = None
        self.shared_covariance = None
        self.class_means = None

    def fit(self, X, y):
        """
        This Is the fit function which create mean vector, shared covariance, class shared scatter matrix, 
        within class scatter matrix eigen values and eigen vectors
         :param X: :param y: :return: 
        """
        # Compute class means
        self.class_means = [np.mean(X[y == c], axis=0) for c in np.unique(y)] # for each Class C
        # Compute shared covariance matrix
        self.shared_covariance = np.cov(X.T) # get Cov using Np. cov
        # Compute between-class scatter matrix
        Sb = np.zeros((X.shape[1], X.shape[1])) # initialize to to store Sb
        for i, mean_vec in enumerate(self.class_means):
            n = X[y == i + 1, :].shape[0] 
            mean_vec = mean_vec.reshape(X.shape[1], 1) # mean vector
            overall_mean = np.mean(X, axis=0).reshape(X.shape[1], 1)
            Sb += n * (mean_vec - overall_mean).dot((mean_vec - overall_mean).T) #  Variance between mean vector of each class
        # Compute within-class scatter matrix
        Sw = self.shared_covariance # store within class scatter matrix
        # Compute eigenvectors and eigenvalues of (Sw^-1)Sb
        eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(Sw).dot(Sb)) # get direction using linalag of eigen values
        # Sort eigenvectors in descending order of eigenvalues
        eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:, i]) for i in range(len(eig_vals))] # make PAir
        eig_pairs.sort(key=lambda x: x[0], reverse=True) # sort it in decending order
        # Choose the first eigenvector as the projection vector
        self.w = eig_pairs[0][1].reshape(X.shape[1], 1) # choose first as projection
        # Compute the bias term
        self.b = -self.w.T.dot(np.mean(self.class_means, axis=0).reshape(X.shape[1], 1)) # Bias Term

    def predict(self, X):
        """
        Predict the y value using the update weight matrix and bias matrix
        :param X:
        :return:
        """
        y_pred = []
        for x in X:
            y_pred.append(np.sign(self.w.T.dot(x.reshape(X.shape[1], 1)) + self.b)) # project X in using projection vector and bias
        return np.array(y_pred).flatten() # flatten the array

    def score(self, y_pred, y):
        """
        MSE of the output and actual y
        :param y_pred:
        :param y:
        :return:
        """
        return np.mean(y_pred == y)


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline



# Load Iris dataset
iris = load_iris()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1, stratify=iris.target)
# Preprocess data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pipeline = make_pipeline(scaler, LDA())
# Train Linear Discriminant analysis model
lda = LDA()
lda.fit(X_train[:,2:4], y_train)

# Evaluate on test set
y_pred = lda.predict(X_test[:,2:4])

# cross validation score 
scores =  cross_val_score(pipeline, X_train[:,2:4], y_train,cv=5, scoring='accuracy')

print('LDA CV acc', np.mean(scores), '+/-' , np.std(scores))
acc = lda.score(y_pred,y_test)
acc  = acc*100
print(f'Test accuracy: {acc:.2f}')
# Plot the figure
fig, ax = plt.subplots(figsize=(10,5))
plot_decision_regions(X_train[:,2:4], y_train, clf=lda, legend=1,ax=ax)
plt.xlabel('petal length (cm)')
plt.ylabel('petal width (cm)')
plt.title('LDA on Iris - Petal')
plt.show()


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline



# Load Iris dataset
iris = load_iris()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1, stratify=iris.target)
# Preprocess data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pipeline = make_pipeline(scaler, LDA()) # make pipeline
# Train Linear Discriminant analysis model
lda = LDA()
lda.fit(X_train[:,:2], y_train)

# Evaluate on test set
y_pred = lda.predict(X_test[:,:2])

# cross validation score 
scores =  cross_val_score(pipeline, X_train[:,:2], y_train,cv=5, scoring='accuracy')

print('LDA CV acc', np.mean(scores), '+/-' , np.std(scores))
acc = lda.score(y_pred,y_test)
acc = acc*100
print(f'Test accuracy: {acc:.2f}')
fig, ax = plt.subplots(figsize=(10,5))
plot_decision_regions(X_train[:,:2], y_train, clf=lda, legend=1,ax=ax)
plt.xlabel('Sepal length (cm)')
plt.ylabel('Sepal width (cm)')
plt.title('LDA on Iris - Sepal')
plt.show()


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline



# Load Iris dataset
iris = load_iris()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1, stratify=iris.target)
# Preprocess data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pipeline = make_pipeline(scaler, LDA())
# Train Linear Discriminant analysis model
lda = LDA()
lda.fit(X_train, y_train)

# Evaluate on test set
y_pred = lda.predict(X_test)

# cross validation score 
scores =  cross_val_score(pipeline, X_train, y_train,cv=5, scoring='accuracy')

print('LDA CV acc', np.mean(scores), '+/-' , np.std(scores))
acc = lda.score(y_pred,y_test)
acc = acc*100
print(f'Test accuracy: {acc:.2f}')


      Model Features      Logistic Regression MSE	 Linear Discriminant Analysis MSE
    1	Petal Features	      93.33	33.33
    2	Sepal Features	      80.00	26.67
    3	All Features	        93.33	33.33