In [161]:
import numpy as np 
from sklearn import datasets
import matplotlib.pyplot as plt 
import cupy as cp 

In [162]:
data=datasets.load_iris()

In [163]:
X=data["data"][:,3:]
Y=(data["target"]==2).astype(np.int32)
Y


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [164]:
from sklearn.model_selection import train_test_split

In [165]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.1)


### Description

#### Logistic Regression

Logistic regression is named for the function used at the core of the method, the [logistic function](https://en.wikipedia.org/wiki/Logistic_function).

The logistic function, also called the **Sigmoid function** was developed by statisticians to describe properties of population growth in ecology, rising quickly and maxing out at the carrying capacity of the environment. It’s an S-shaped curve that can take any real-valued number and map it into a value between 0 and 1, but never exactly at those limits.

$$\frac{1}{1 + e^{-x}}$$

$e$ is the base of the natural logarithms and $x$ is value that you want to transform via the logistic function.

The logistic regression equation has a very simiar representation like linear regression. The difference is that the output value being modelled is binary in nature.

$$\hat{y}=\frac{e^{\beta_0+\beta_1x_1}}{1+\beta_0+\beta_1x_1}$$

or

$$\hat{y}=\frac{1.0}{1.0+e^{-\beta_0-\beta_1x_1}}$$

$\beta_0$ is the intecept term

$\beta_1$ is the coefficient for $x_1$

$\hat{y}$ is the predicted output with real value between 0 and 1. To convert this to binary output of 0 or 1, this would either need to be rounded to an integer value or a cutoff point be provided to specify the class segregation point.

# Learning with Stochastic Gradient Descent

Logistic Regression uses gradient descent to update the coefficients.

Each gradient descent iteration, the coefficients are updated using the equation:

$$\beta=\beta+\textrm{learning rate}\times (y-\hat{y}) \times \hat{y} \times (1-\hat{y}) \times x $$

In [166]:
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))
def loss(yh,y):
    return yh-y


In [167]:
class Logistic_regression():
    def __init__(self,shapes,lr=0.01):
        self.w=np.random.randn(shapes[1],1)
        self.b=np.random.randn(shapes[0],1)
        self.lr=lr
    def fit(self,x,y):
        k=np.dot(x,self.w)+self.b
        yh=sigmoid(k)
        l=loss(yh,y)
        dw=np.mean(l.dot(x))
        db=2*np.sum(yh-y)
        self.w-=self.lr*dw
        self.b-=self.lr*db
        return l
    def predict(self,x):
        k=np.dot(x,self.w)
        yh=sigmoid(k)
        return yh
    
        


In [168]:
lg=Logistic_regression(shapes=(135,1),lr=0.001)
for _ in range(2):
    l=lg.fit(x_train,y_train)
    print(l)

[[-0.43812422  0.56187578  0.56187578 ...  0.56187578 -0.43812422
   0.56187578]
 [-0.25459919  0.74540081  0.74540081 ...  0.74540081 -0.25459919
   0.74540081]
 [-0.96365582  0.03634418  0.03634418 ...  0.03634418 -0.96365582
   0.03634418]
 ...
 [-0.32239167  0.67760833  0.67760833 ...  0.67760833 -0.32239167
   0.67760833]
 [-0.48496287  0.51503713  0.51503713 ...  0.51503713 -0.48496287
   0.51503713]
 [-0.40141262  0.59858738  0.59858738 ...  0.59858738 -0.40141262
   0.59858738]]
[[-9.98921225e-01  1.07877547e-03  1.07877547e-03 ...  1.07877547e-03
  -9.98921225e-01  1.07877547e-03]
 [-9.97569428e-01  2.43057221e-03  2.43057221e-03 ...  2.43057221e-03
  -9.97569428e-01  2.43057221e-03]
 [-9.99968597e-01  3.14025402e-05  3.14025402e-05 ...  3.14025402e-05
  -9.99968597e-01  3.14025402e-05]
 ...
 [-9.98243589e-01  1.75641091e-03  1.75641091e-03 ...  1.75641091e-03
  -9.98243589e-01  1.75641091e-03]
 [-9.99106970e-01  8.93029662e-04  8.93029662e-04 ...  8.93029662e-04
  -9.99106970

In [169]:
print(lg.predict(x_test))
print(y_test)

[[0.6116469 ]
 [0.51135405]
 [0.50567776]
 [0.60623852]
 [0.58986133]
 [0.58435572]
 [0.50567776]
 [0.62770323]
 [0.58435572]
 [0.51135405]
 [0.60080398]
 [0.62770323]
 [0.61702792]
 [0.60080398]
 [0.56771737]]
[1 0 0 1 0 0 0 1 0 0 1 1 1 1 0]


In [170]:
from sklearn.linear_model import LogisticRegression

In [171]:
clf_LR = LogisticRegression(C=1.0, tol=0.0001)
#0.1<c<0.5 
# 1<c<10

In [172]:
clf_LR.fit(x_train,y_train)

LogisticRegression()

In [174]:
print(clf_LR.predict(x_test))
print(clf_LR.predict_proba(x_test))
print(y_test)

[1 0 0 1 0 0 0 1 0 0 1 1 1 1 0]
[[0.20199685 0.79800315]
 [0.99761177 0.00238823]
 [0.9984163  0.0015837 ]
 [0.27642441 0.72357559]
 [0.56770923 0.43229077]
 [0.664654   0.335346  ]
 [0.9984163  0.0015837 ]
 [0.06858449 0.93141551]
 [0.664654   0.335346  ]
 [0.99761177 0.00238823]
 [0.36570787 0.63429213]
 [0.06858449 0.93141551]
 [0.14363107 0.85636893]
 [0.36570787 0.63429213]
 [0.87201374 0.12798626]]
[1 0 0 1 0 0 0 1 0 0 1 1 1 1 0]


## Using Penalties 
Using lasso and Tikhonov functions to regulize intersect and slope margins

In [177]:
clf_LR = LogisticRegression(C=1.0,penalty="l2",tol=0.0001)
clf_LR.fit(x_train,y_train)
print(clf_LR.predict(x_test))
print(clf_LR.predict_proba(x_test))
print(y_test)

[1 0 0 1 0 0 0 1 0 0 1 1 1 1 0]
[[0.20199685 0.79800315]
 [0.99761177 0.00238823]
 [0.9984163  0.0015837 ]
 [0.27642441 0.72357559]
 [0.56770923 0.43229077]
 [0.664654   0.335346  ]
 [0.9984163  0.0015837 ]
 [0.06858449 0.93141551]
 [0.664654   0.335346  ]
 [0.99761177 0.00238823]
 [0.36570787 0.63429213]
 [0.06858449 0.93141551]
 [0.14363107 0.85636893]
 [0.36570787 0.63429213]
 [0.87201374 0.12798626]]
[1 0 0 1 0 0 0 1 0 0 1 1 1 1 0]


## Using Multiclass classification 

Softmax 

$$\frac{e^{x}} {\sum_{i=1}^{N}e^{xi}}$$

$e$ is the base of the natural logarithms and $x$ is value that you want to transform via the logistic function.
$N$ is the number of classes 


In [180]:
X = data["data"][:, (2, 3)] # petal length, petal width
y = data["target"]

In [184]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.1)


In [185]:
softmax_reg = LogisticRegression(multi_class="multinomial",solver="lbfgs", C=10)
softmax_reg.fit(x_train,y_train)

LogisticRegression(C=10, multi_class='multinomial')

In [186]:
softmax_reg.predict(x_test)

array([1, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2])

In [187]:
y_test

array([1, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2])