In [2]:
import numpy as np

def create_train_data():
  data = [['Sunny', 'Hot', 'High', 'Weak', 'no'],
          ['Sunny', 'Hot', 'High', 'Strong', 'no'],
          ['Overcast', 'Hot', 'High', 'Weak', 'yes'],
          ['Rain', 'Mild', 'High', 'Weak', 'yes'],
          ['Rain', 'Cool', 'Normal', 'Weak', 'yes'],
          ['Rain', 'Cool', 'Normal', 'Strong', 'no'],
          ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
          ['Overcast', 'Mild', 'High', 'Weak', 'no'],
          ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
          ['Rain', 'Mild', 'Normal', 'Weak', 'yes']]

  return np.array(data)

train_data = create_train_data()
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


In [3]:
def compute_prior_probablity(train_data):
  y_unique = ['no', 'yes']
  prior_probability = np.zeros(len(y_unique))

  y_column = train_data[:,-1]

  for i, value in enumerate(y_unique):
        count = np.sum(y_column == value)
        prior_probability[i] = count / len(y_column)

  return prior_probability

In [4]:
prior_probablity = compute_prior_probablity(train_data)
print("P(play tennis = No)" , prior_probablity[0])
print ("P(play tennis = Yes)" , prior_probablity[1])

P(play tennis = No) 0.4
P(play tennis = Yes) 0.6


In [5]:
def compute_conditional_probability ( train_data ) :
  y_unique = ['no', 'yes']
  conditional_probability = []
  list_x_name = []
  for i in range (0, train_data.shape[1]-1):
    x_unique = np.unique(train_data[:, i])
    list_x_name.append(x_unique)

  # your code here ********************
  for i in range(train_data.shape[1] - 1):
        x_conditional_probability = {}
        for x_val in list_x_name[i]:
            x_conditional_probability[x_val] = {}
            for y_val in y_unique:
                count_x_and_y = np.sum((train_data[:, i] == x_val) & (train_data[:, -1] == y_val))
                count_y = np.sum(train_data[:, -1] == y_val)
                conditional_prob = count_x_and_y / count_y if count_y != 0 else 0
                x_conditional_probability[x_val][y_val] = conditional_prob
        conditional_probability.append(x_conditional_probability)

  return conditional_probability, list_x_name

In [6]:
train_data = create_train_data ()
_ , list_x_name = compute_conditional_probability( train_data )
print ("x1 = ", list_x_name [0])
print ("x2 = ", list_x_name [1])
print ("x3 = ", list_x_name [2])
print ("x4 = ", list_x_name [3])

x1 =  ['Overcast' 'Rain' 'Sunny']
x2 =  ['Cool' 'Hot' 'Mild']
x3 =  ['High' 'Normal']
x4 =  ['Strong' 'Weak']


In [7]:
# This function is used to return the index of the feature name
def get_index_from_value ( feature_name , list_features ) :
  return np.where(list_features == feature_name)[0][0]

In [8]:
train_data = create_train_data ()
_ , list_x_name = compute_conditional_probability ( train_data )
outlook = list_x_name [0]

i1 = get_index_from_value ("Overcast", outlook )
i2 = get_index_from_value ("Rain", outlook )
i3 = get_index_from_value ("Sunny", outlook )

print ( i1 , i2 , i3 )

0 1 2


In [9]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)

# Compute P("Outlook"="Sunny"|"Play Tennis"="Yes")'
play_tennis_yes_prob = conditional_probability[0]['Sunny']['yes']

print("P('Outlook'='Sunny'|'Play Tennis'='Yes') = ", np.round(play_tennis_yes_prob, 2))


P('Outlook'='Sunny'|'Play Tennis'='Yes') =  0.17


In [10]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)

# Compute P("Outlook"="Sunny"|"Play Tennis"="No")
outlook_sunny_given_no = conditional_probability[0]['Sunny']['no']

print("P('Outlook'='Sunny'|'Play Tennis'='No') = ", np.round(outlook_sunny_given_no, 2))

P('Outlook'='Sunny'|'Play Tennis'='No') =  0.5


In [11]:
# ##########################
# Train Naive Bayes Model
# ##########################
def train_naive_bayes ( train_data ) :
# Step 1: Calculate Prior Probability
  y_unique = ['no', 'yes']
  prior_probability = compute_prior_probablity ( train_data )

# Step 2: Calculate Conditional Probability
  conditional_probability , list_x_name = compute_conditional_probability ( train_data)

  return prior_probability , conditional_probability , list_x_name

In [13]:
def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
    x1 = get_index_from_value(X[0], list_x_name[0])
    x2 = get_index_from_value(X[1], list_x_name[1])
    x3 = get_index_from_value(X[2], list_x_name[2])
    x4 = get_index_from_value(X[3], list_x_name[3])

    p_no = prior_probability[0]
    p_yes = prior_probability[1]

    p_outlook_no = conditional_probability[0][list_x_name[0][x1]]['no']
    p_outlook_yes = conditional_probability[0][list_x_name[0][x1]]['yes']

    p_temperature_no = conditional_probability[1][list_x_name[1][x2]]['no']
    p_temperature_yes = conditional_probability[1][list_x_name[1][x2]]['yes']

    p_humidity_no = conditional_probability[2][list_x_name[2][x3]]['no']
    p_humidity_yes = conditional_probability[2][list_x_name[2][x3]]['yes']

    p_wind_no = conditional_probability[3][list_x_name[3][x4]]['no']
    p_wind_yes = conditional_probability[3][list_x_name[3][x4]]['yes']

    p0 = p_no * p_outlook_no * p_temperature_no * p_humidity_no * p_wind_no
    p1 = p_yes * p_outlook_yes * p_temperature_yes * p_humidity_yes * p_wind_yes

    if p0 > p1:
        y_pred = 0
    else:
        y_pred = 1

    return y_pred


In [14]:
X = ['Sunny', 'Cool', 'High', 'Strong']
data = create_train_data()
prior_probability , conditional_probability , list_x_name = train_naive_bayes ( data )
pred = prediction_play_tennis (X , list_x_name , prior_probability ,
conditional_probability )

if( pred ) :
  print ("Ad should go!")
else :
  print ("Ad should not go!")

Ad should not go!
