## LR using numpy

In [1]:
import numpy as np

In [2]:
#Set Seed
np.random.seed(0)
m = 1000

#generate data
X1 = np.random.randn(m, 1)
X2 = np.random.randn(m, 1)

#Bias term
X0 = np.ones((m, 1))

#Concatenate the features and bias term
X = np.concatenate((X0, X1, X2), axis=1)

In [3]:
X

array([[ 1.        ,  1.76405235,  0.55596268],
       [ 1.        ,  0.40015721,  0.89247389],
       [ 1.        ,  0.97873798, -0.42231482],
       ...,
       [ 1.        ,  0.0941923 ,  0.15843385],
       [ 1.        , -1.14761094, -1.14190142],
       [ 1.        , -0.35811408, -1.31097037]])

In [5]:
true_weights = np.array([-1, 2, -1]) #Coef. for bias, X1, X2

In [6]:
logits = X.dot(true_weights)

In [7]:
pribabilities = 1/(1+np.exp(-logits))
y = (pribabilities > 0.5).astype(int)

In [12]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [8]:
def compute_cost(X, y, theta):
    m = len(y)
    h = sigmoid(X.dot(theta))
    epsilon = 1e-5 #This help to prevent division by zero
    cost = (-1 / m) * (y.T.dot(np.log(h+epsilon)) + (1-y).T.dot(np.log(1-h+epsilon)))
    return cost [0,0]

In [9]:
#Initialize the weights i.e, coef
theta = np.random.randn(3,1)

In [10]:
#Set hyperparameters
learning_rate = 0.1
num_epochs = 1000

In [13]:
#Performing Gradient Decent
for epoch in range(num_epochs):
    #Take the mean after caluclating gradient for each sample
    gradient = (1/m) * X.T.dot(sigmoid(X.dot(theta)) - y.reshape(-1,1))
    theta -= learning_rate * gradient

In [14]:
#Make the prediction using trained model
predicted_probabilities = sigmoid(X.dot(theta))
predicted_classes = (predicted_probabilities > 0.5).astype(int)

In [15]:
#Evaluate the accuracy of the model
accuracy = (predicted_classes == y.reshape(-1,1)).mean()

In [16]:
print('Trained coefficient (weight)', theta)
print("Accuracy:", accuracy)

Trained coefficient (weight) [[-2.133043  ]
 [ 4.36885825]
 [-2.10666418]]
Accuracy: 0.998


## LR using Sklearn

In [17]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [18]:
#Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [19]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [20]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [21]:
#Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=2)

In [22]:
#Initialize model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [23]:
#Make predictions
y_pred = model.predict(X_test)

In [24]:
#Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

In [25]:
print('Accuracy', accuracy)

Accuracy 0.9666666666666667


In [26]:
#Initialize model
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)

In [27]:
#Make predictions
y_pred = model.predict(X_test)

In [28]:
#Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

In [29]:
print('Accuracy', accuracy)

Accuracy 0.9666666666666667


## Training model with 50% , 60%, 70%, 80%

In [3]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [4]:
#Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [5]:
test_sizes = [0.2, 0.3, 0.4, 0.5]

for i in test_sizes:
    #Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=2)

    #Initialize model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)

    #Make predictions
    y_pred = model.predict(X_test)

    #Calculate the accuracy
    accuracy = accuracy_score(y_test, y_pred)

    print('Accuracy with test size {}'.format(i), accuracy)

Accuracy with test size 0.2 0.9666666666666667
Accuracy with test size 0.3 0.9777777777777777
Accuracy with test size 0.4 0.95
Accuracy with test size 0.5 0.9466666666666667
