<a href="https://colab.research.google.com/github/crshao/tensorflow-python-neural-networks/blob/main/TensorFlowCoreLearningAlgorithms_Clustering_and_HiddenMarkovModels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Clustering
# is Unsupervised Learning Algorithm
# Usually used when datasets do not have labels

# Basic algorithm for k-means:
# Step 1: Randomly pick K points to place K centroids
# Step 2: Assign all of the data points to the centroids by distance. The closest centroid to a point is the one it is assigned to
# Step 3: Average all of the points belonging to each centroid to find the middle of those clusters (center of mass). Place the corresponding
# centroids into that position
# Step 4: Reassign every point once again to the closest centroid.
# Step 5: Repeat steps 3-4 until no point changes which centroid it belongs to

In [None]:
# Hidden Markov Models
# - Probability Distribution

In [1]:
%tensorflow_version 2.x
import tensorflow_probability as tfp
import tensorflow as tf

In [2]:
# 1. Cold days are encoded by a 0 and hot days are encoded by 1
# 2. The first day in our sequence has an 80% chance of being cold
# 3. A cold day has a 30% chance of being followed by a hot day
# 4. A hot day has a 20% chance of being followed by a cold day
# 5. On each day the temperature is normally distributed with mean and standard deviation 0 and 5 on a cold day and mean and standard deviation 15 and 10
# on a hot day

tfd = tfp.distributions # making a shotcut for later on
initial_distribution = tfd.Categorical(probs=[0.8, 0.2]) # Refer to point 2 above
transition_distribution = tfd.Categorical(probs=[[0.7, 0.3],
                                                 [0.2, 0.8]]) # refer to points 3 and 4 above
observation_distribution = tfd.Normal(loc=[0., 15.], scale=[5., 10.]) # refer to point 5 above

# the loc argument represents the mean and the scale is the standard deviation

In [3]:
model = tfd.HiddenMarkovModel(
    initial_distribution=initial_distribution,
    transition_distribution=transition_distribution,
    observation_distribution=observation_distribution,
    num_steps=7
)

In [4]:
mean = model.mean()

with tf.compat.v1.Session() as sess:
  print(mean.numpy())

# temperature each day

[2.9999998 5.9999995 7.4999995 8.25      8.625001  8.812501  8.90625  ]
