# Tensorflow making softmax classifier

## imports

In [1]:
import tensorflow as tf
import numpy as np

print(tf.__version__)

tf.random.set_seed(777)  # for reproducibility

2.3.0


## datas

In [13]:
x_data = [[1, 2, 1, 1],
          [2, 1, 3, 2],
          [3, 1, 3, 4],
          [4, 1, 5, 5],
          [1, 7, 5, 5],
          [1, 2, 5, 6],
          [1, 6, 6, 6],
          [1, 7, 7, 7]]
y_data = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]

#convert into numpy and float format
x_data = np.asarray(x_data, dtype=np.float32)
y_data = np.asarray(y_data, dtype=np.float32)

print(f'X shape - {x_data.shape}')
print(f'Y shape - {y_data.shape}')

X shape - (8, 4)
Y shape - (8, 3)


W should be shaped as (3, 4)
b should be shaped as (3, 1)

## Model - hypothesis

In [14]:
W = tf.Variable(tf.random.uniform((4, 3), -10, 10))
b = tf.Variable(tf.random.uniform((1, 3), -10, 10))
variables = [W, b]

XW + b -> sigmoid > softmax

In [15]:
def hypothesis(X):
    theta = tf.add(tf.matmul(X, W), b)        
    normalized = theta # tf.divide(1., 1. + tf.exp(theta))
    return tf.nn.softmax(normalized)

In [16]:
hypothesis(x_data)

<tf.Tensor: shape=(8, 3), dtype=float32, numpy=
array([[9.8252660e-01, 1.2772529e-02, 4.7009205e-03],
       [9.8319489e-01, 1.6805071e-02, 2.0518032e-10],
       [9.2877811e-01, 7.1221910e-02, 2.4506068e-14],
       [9.6506792e-01, 3.4932099e-02, 2.9891201e-21],
       [3.3135342e-11, 1.0000000e+00, 9.2847209e-16],
       [3.4521524e-16, 1.0000000e+00, 1.3064970e-22],
       [2.6780382e-15, 1.0000000e+00, 2.2375779e-21],
       [1.6793844e-18, 1.0000000e+00, 3.0305968e-25]], dtype=float32)>

## Loss

In [17]:
def loss(predicted, Y):
    return tf.reduce_mean(-tf.reduce_sum(Y * tf.math.log(predicted), axis=1))

In [18]:
loss(hypothesis(x_data), y_data)

<tf.Tensor: shape=(), dtype=float32, numpy=17.10539>

## gradient

In [19]:
def GD(X, Y):
    with tf.GradientTape() as tape:
        cost = loss(hypothesis(X), Y)
        grads = tape.gradient(cost, variables)
    return grads

In [20]:
GD(x_data, y_data)

[<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
 array([[ 0.94944024, -0.20002788, -0.7494124 ],
        [-1.0197382 ,  1.518563  , -0.49882478],
        [-0.18202686,  1.0564392 , -0.8744124 ],
        [-0.188829  ,  1.0632415 , -0.87441236]], dtype=float32)>,
 <tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[ 0.23244593,  0.14196646, -0.3744124 ]], dtype=float32)>]

## training

In [10]:
!pip install tqdm



In [23]:
from tqdm.notebook import tqdm
epoch = 10000
learning_rate = 0.1

def training(X, Y, epoch, learning_rate):
    optimizer = tf.optimizers.SGD() # tutorial used keras.optimizer.SGD 
    for i in tqdm(range(epoch)):
        grads = GD(X, Y)
        optimizer.apply_gradients(zip(grads, variables))
        if i % 100 == 0  :
            print(f'{i}\t{loss(hypothesis(X), Y)}')
        
training(x_data, y_data, epoch, learning_rate)

  0%|          | 0/10000 [00:00<?, ?it/s]

0	0.9372616410255432
100	0.8866345882415771
200	0.8366848230361938
300	0.787514865398407
400	0.7392809987068176
500	0.692199170589447
600	0.6465537548065186
700	0.6026880145072937
800	0.5609884858131409
900	0.5218412280082703
1000	0.48559635877609253
1100	0.4525284767150879
1200	0.422810435295105
1300	0.3965117633342743
1400	0.37360143661499023
1500	0.3539518117904663
1600	0.3373486399650574
1700	0.3234988749027252
1800	0.3120531141757965
1900	0.30263543128967285
2000	0.29487529397010803
2100	0.2884381115436554
2200	0.28303763270378113
2300	0.2784404754638672
2400	0.2744651734828949
2500	0.27097001671791077
2600	0.267850399017334
2700	0.26502561569213867
2800	0.26243528723716736
2900	0.2600344121456146
3000	0.25778836011886597
3100	0.25567036867141724
3200	0.25365978479385376
3300	0.25174060463905334
3400	0.24990081787109375
3500	0.24812906980514526
3600	0.24641790986061096
3700	0.24476003646850586
3800	0.24315021932125092
3900	0.2415831983089447
4000	0.24005620181560516
4100	0.2385651

In [32]:
sample_data = [[2,1,3,2]] # answer_label [[0,0,1]]
sample_data = np.asarray(sample_data, dtype=np.float32)

b = hypothesis(x_data)
print(b)
print(tf.argmax(b, 1))

tf.Tensor(
[[7.8551257e-01 2.1006010e-01 4.4272733e-03]
 [7.7455759e-01 2.2140647e-01 4.0359795e-03]
 [7.5654399e-01 2.3985958e-01 3.5964840e-03]
 [6.1532533e-01 3.8318926e-01 1.4853713e-03]
 [7.1815139e-01 2.8073332e-01 1.1152421e-03]
 [6.5409976e-01 3.4473494e-01 1.1653423e-03]
 [8.5112667e-01 1.4800638e-01 8.6704158e-04]
 [8.9212579e-01 1.0711933e-01 7.5482985e-04]], shape=(8, 3), dtype=float32)
tf.Tensor([0 0 0 0 0 0 0 0], shape=(8,), dtype=int64)
tf.Tensor(
[[1.2771609e-01 5.3731424e-01 3.3496961e-01]
 [7.7455735e-01 2.2140670e-01 4.0359804e-03]
 [4.7919638e-03 9.8957258e-01 5.6354576e-03]
 [2.9917302e-02 9.7002989e-01 5.2849868e-05]
 [9.5445496e-01 4.5544878e-02 2.3866650e-07]
 [8.6858869e-01 1.3141119e-01 1.4152326e-07]
 [9.9625236e-01 3.7476579e-03 1.9310022e-09]
 [9.9927253e-01 7.2744628e-04 3.6020558e-11]], shape=(8, 3), dtype=float32)
tf.Tensor([1 0 1 1 0 0 0 0], shape=(8,), dtype=int64)
