# Implementing a Logistic regression
Implementation of the logistic regression using tensorflow. Will be tested on the iris dataset

In [29]:
%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 926 µs


In [69]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

time: 2.16 ms


In [52]:
data = pd.DataFrame(load_iris().data, columns=load_iris().feature_names)
target = pd.Series(load_iris().target)

time: 7.17 ms


In [53]:
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


time: 23.2 ms


In [54]:
target.value_counts()

2    50
1    50
0    50
dtype: int64

time: 14.4 ms


### Transforming data in binary

In [65]:
target_bin = target[target != 2]
data_bin = data.loc[target_bin.index, :]

time: 9.21 ms


### Fitting logistic regression 

In [66]:
scaler = StandardScaler()

time: 600 µs


In [67]:
data_bin.shape

(100, 4)

time: 6.63 ms


In [132]:
%%time
n_epochs = 10001
learning_rate = 0.01

scaled_X = np.c_[np.ones((data_bin.shape[0],1)), np.array(data_bin)]
scaled_X[:, 1:] = scaler.fit_transform(scaled_X[:, 1:])
y = np.array(target_bin).reshape(-1, 1)

X = tf.constant(scaled_X, dtype=tf.float32, name="X")
y = tf.constant(y, dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([data_bin.shape[1] + 1, 1], -1.0, 1.0), name="theta")
net_sum = tf.matmul(X, theta, name="net_sum")
y_pred = 1 / (1 + tf.exp(-net_sum))
error = - y * (tf.log(y_pred)) - (1 - y) * tf.log(1 - y_pred)
mll = tf.reduce_mean(tf.square(error), name="mll")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mll)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 1000 == 0:
            print("Epoch", epoch, "MLL =", mll.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Epoch 0 MLL = 0.7118552
Epoch 1000 MLL = 0.0154691385
Epoch 2000 MLL = 0.007982738
Epoch 3000 MLL = 0.005474584
Epoch 4000 MLL = 0.004205438
Epoch 5000 MLL = 0.003433798
Epoch 6000 MLL = 0.0029125884
Epoch 7000 MLL = 0.0025356256
Epoch 8000 MLL = 0.0022495687
Epoch 9000 MLL = 0.002024623
Epoch 10000 MLL = 0.0018428101
CPU times: user 2.67 s, sys: 199 ms, total: 2.87 s
Wall time: 1.79 s
time: 1.79 s


In [133]:
best_theta

array([[ 0.16987953],
       [ 1.1426195 ],
       [-1.1748476 ],
       [ 0.5602829 ],
       [ 1.747018  ]], dtype=float32)

time: 13.3 ms
