In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = sns.load_dataset('iris')

In [4]:
df['species'].value_counts()

Unnamed: 0_level_0,count
species,Unnamed: 1_level_1
setosa,50
versicolor,50
virginica,50


In [5]:
for i in range(100,150):
  df.drop(index=i,axis=0,inplace=True)

In [6]:
df['species'].value_counts()

Unnamed: 0_level_0,count
species,Unnamed: 1_level_1
setosa,50
versicolor,50


In [7]:
X = df.drop('species',axis=1)
y = df['species']

In [8]:
X.shape,y.shape

((100, 4), (100,))

## Splitting into train,validation

In [9]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [10]:
X.shape

(100, 4)

In [11]:
X_train

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
36,5.5,3.5,1.3,0.2
78,6.0,2.9,4.5,1.5
16,5.4,3.9,1.3,0.4
49,5.0,3.3,1.4,0.2
43,5.0,3.5,1.6,0.6
...,...,...,...,...
81,5.5,2.4,3.7,1.0
72,6.3,2.5,4.9,1.5
22,4.6,3.6,1.0,0.2
39,5.1,3.4,1.5,0.2


In [12]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [13]:
y_train,y_test

(array([0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,
        0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0,
        0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1,
        1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1]),
 array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0]))

## Logistic Regression
Basically we take Linear regression model then apply sigmoid function to get the output and probabality value and classify

In [14]:
class LogisticRegression():
  def __init__(self,lr=0.001,num_iter=1000):
    self.lr = lr
    self.num_iter = num_iter
    self.w = None
    self.b = None

  def fit(self,x,y):
    x,y = np.array(x),np.array(y)
    n_samples, n_features = x.shape
    self.w = np.zeros(n_features)
    self.b = 0
    # gradient descent
    for _ in range(self.num_iter):
      linear_model = np.dot(self.w,x.T) +self.b
      y_pred = self._sigmoid(linear_model)
      w_grad = (1/n_samples)*np.dot(x.T,(y_pred-y))
      b_grad = (1/n_samples)*np.sum((y_pred-y))
      self.w -= self.lr * w_grad
      self.b -= self.lr * b_grad

  def predict(self,x):
    x = np.array(x)
    n_samples, n_features = x.shape
    linear_model = np.dot(self.w,x.T) + self.b
    y_pred = self._sigmoid(linear_model)
    predictions = [1 if i > 0.5 else 0 for i in y_pred]
    return predictions

  def _sigmoid(self,x):
    return 1/(1+np.exp(-x))

In [15]:
model = LogisticRegression()

model.fit(X_train,y_train)

In [16]:
preds = model.predict(X_test)
preds

[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0]