# Implementing a Logistic regression
Implementation of the logistic regression using tensorflow. Will be tested on the iris dataset

In [1]:
%load_ext autotime

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

time: 1.55 s


In [3]:
data = pd.DataFrame(load_iris().data, columns=load_iris().feature_names)
target = pd.Series(load_iris().target)

time: 106 ms


In [4]:
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


time: 253 ms


In [5]:
target.value_counts()

2    50
1    50
0    50
dtype: int64

time: 49.8 ms


### Transforming data in binary

In [6]:
target_bin = target[target != 2]
data_bin = data.loc[target_bin.index, :]

time: 54.8 ms


### Fitting logistic regression 

In [7]:
scaler = StandardScaler()

time: 58.8 ms


In [8]:
data_bin.shape

(100, 4)

time: 120 ms


In [9]:
%%time
n_epochs = 10001
learning_rate = 0.01

scaled_X = np.c_[np.ones((data_bin.shape[0],1)), np.array(data_bin)]
scaled_X[:, 1:] = scaler.fit_transform(scaled_X[:, 1:])
y = np.array(target_bin).reshape(-1, 1)

X = tf.constant(scaled_X, dtype=tf.float32, name="X")
y = tf.constant(y, dtype=tf.float32, name="y")
theta = tf.Variable(tf.random_uniform([data_bin.shape[1] + 1, 1], -1.0, 1.0), name="theta")
net_sum = tf.matmul(X, theta, name="net_sum")
y_pred = 1 / (1 + tf.exp(-net_sum))
error = - y * (tf.log(y_pred)) - (1 - y) * tf.log(1 - y_pred)
mll = tf.reduce_mean(tf.square(error), name="mll")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mll)

init = tf.global_variables_initializer() 
with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        if epoch % 1000 == 0:
            print("Epoch", epoch, "MLL =", mll.eval())
        sess.run(training_op)

    best_theta = theta.eval()

Instructions for updating:
Colocations handled automatically by placer.
Epoch 0 MLL = 0.054108534
Epoch 1000 MLL = 0.009555064
Epoch 2000 MLL = 0.0055648447
Epoch 3000 MLL = 0.004018074
Epoch 4000 MLL = 0.0031803516
Epoch 5000 MLL = 0.002649308
Epoch 6000 MLL = 0.002280125
Epoch 7000 MLL = 0.002007362
Epoch 8000 MLL = 0.0017969429
Epoch 9000 MLL = 0.0016292838
Epoch 10000 MLL = 0.0014923009
CPU times: user 2.41 s, sys: 163 ms, total: 2.58 s
Wall time: 1.7 s
time: 1.8 s


In [10]:
best_theta

array([[ 0.4439146 ],
       [ 0.81248903],
       [-0.8550664 ],
       [ 1.4218413 ],
       [ 1.4318407 ]], dtype=float32)

time: 2.64 ms
