In [1]:
import numpy as np
import tensorflow as tf
np.random.seed(2042)

In [2]:
print(tf.__version__)

2.1.0


In [3]:
from sklearn import datasets
iris=datasets.load_iris()
list(iris.keys())

['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename']

In [4]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [5]:
X = iris["data"][:, (2, 3)]  # petal length, petal width
y = iris["target"]
print(len(X))

150


In [6]:
X[:5]

array([[1.4, 0.2],
       [1.4, 0.2],
       [1.3, 0.2],
       [1.5, 0.2],
       [1.4, 0.2]])

### total 150 samples

In [7]:
X=X.astype('float32')
print(X.dtype)
print(X.shape)

float32
(150, 2)


In [8]:
num_examples = X.shape[0]
print(num_examples)

150


In [9]:
X_with_bias = np.c_[np.ones([len(X), 1]), X]

In [10]:
X_with_bias[:5]

array([[1.        , 1.39999998, 0.2       ],
       [1.        , 1.39999998, 0.2       ],
       [1.        , 1.29999995, 0.2       ],
       [1.        , 1.5       , 0.2       ],
       [1.        , 1.39999998, 0.2       ]])

In [11]:
test_ratio = 0.2
validation_ratio = 0.2
total_size = len(X_with_bias)

test_size = int(total_size * test_ratio)
validation_size = int(total_size * validation_ratio)
train_size = total_size - test_size - validation_size

rnd_indices = np.random.permutation(total_size)

### total=150, train=90, test=30

In [12]:
print(train_size)
print(test_size)

90
30


In [13]:
X_train = X_with_bias[rnd_indices[:train_size]]
y_train = y[rnd_indices[:train_size]]
X_valid = X_with_bias[rnd_indices[train_size:-test_size]]
y_valid = y[rnd_indices[train_size:-test_size]]
X_test = X_with_bias[rnd_indices[-test_size:]]
y_test = y[rnd_indices[-test_size:]]

In [14]:
X_train_wo_bias = X[rnd_indices[:train_size]]

In [15]:
X_train_wo_bias[:5]

array([[1.4, 0.2],
       [4.1, 1.3],
       [5.2, 2. ],
       [4. , 1.3],
       [4.1, 1.3]], dtype=float32)

In [16]:
X_train[:5]

array([[1.        , 1.39999998, 0.2       ],
       [1.        , 4.0999999 , 1.29999995],
       [1.        , 5.19999981, 2.        ],
       [1.        , 4.        , 1.29999995],
       [1.        , 4.0999999 , 1.29999995]])

### Now let's use tensorflow to check gradient 

In [17]:
W_matrix=np.array([ [ 0.43433246],[ 1.0188498] ] , dtype=np.float32)
print(W_matrix)
print(W_matrix.shape)

[[0.43433246]
 [1.0188498 ]]
(2, 1)


In [18]:
bias=np.array([[ 0.11330361]], dtype=np.float32)
print(bias)
print(bias.shape)

[[0.11330361]]
(1, 1)


In [19]:
w=tf.Variable(W_matrix)
b=tf.Variable(bias)

In [80]:
w

<tf.Variable 'Variable:0' shape=(2, 1) dtype=float32, numpy=
array([[0.43433246],
       [1.0188498 ]], dtype=float32)>

In [81]:
b

<tf.Variable 'Variable:0' shape=(1, 1) dtype=float32, numpy=array([[0.11330361]], dtype=float32)>

In [23]:
sample1=np.array([X_train_wo_bias[0]])
print(sample1)
print(type(sample1))
print(sample1.shape)

[[1.4 0.2]]
<class 'numpy.ndarray'>
(1, 2)


In [49]:
x1=sample1[0][0]
print(x1)

1.4


In [50]:
x2=sample1[0][1]
print(x2)

0.2


In [82]:
y=tf.constant(0.0)

In [83]:
z=tf.matmul(sample1,w)+b
a=tf.math.sigmoid(z)

In [84]:
z

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.925139]], dtype=float32)>

In [85]:
a

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.71608806]], dtype=float32)>

In [87]:
dz=a-y
print(dz)

tf.Tensor([[0.71608806]], shape=(1, 1), dtype=float32)


In [88]:
dw1=x1*dz
dw2=x2*dz
print(dw1)
print(dw2)

tf.Tensor([[1.0025233]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.14321761]], shape=(1, 1), dtype=float32)


In [89]:
with tf.GradientTape() as tape:
    z=tf.matmul(sample1,w)+b
    a=tf.math.sigmoid(z)
    loss = - ( y*tf.math.log(a) + (1-y)*tf.math.log(1-a)  )
        
gradient1=tape.gradient(loss,[z,w])
print('grad=',gradient1)

grad= [<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.71608806]], dtype=float32)>, <tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[1.0025233 ],
       [0.14321761]], dtype=float32)>]
