In [77]:
import numpy as np

def create_train_data():
    data = [
        ['Sunny', 'Hot', 'High', 'Weak', 'no'],
        ['Sunny', 'Hot', 'High', 'Strong', 'no'],
        ['Overcast', 'Hot', 'High', 'Weak', 'yes'],
        ['Rain', 'Mild', 'High', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Strong', 'no'],
        ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
        ['Overcast', 'Mild', 'High', 'Weak', 'no'],
        ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Mild', 'Normal', 'Weak', 'yes']
    ]
    return np.array(data)

def compute_prior_probability(train_data):
    y_unique = ['no', 'yes']
    prior_probability = np.zeros(len(y_unique))
    total_samples = len(train_data)

    # Counting the occurrences of each class label
    for row in train_data:
        if row[-1] == y_unique[0]:
            prior_probability[0] += 1
        elif row[-1] == y_unique[1]:
            prior_probability[1] += 1

    # Calculating prior probabilities
    prior_probability = prior_probability / total_samples
    return prior_probability

train_data = create_train_data()
prior_probability = compute_prior_probability(train_data)
print("P(play tennis = No)", prior_probability[0])
print("P(play tennis = Yes)", prior_probability[1])


P(play tennis = No) 0.4
P(play tennis = Yes) 0.6


In [78]:
def compute_conditional_probability(train_data):
    y_unique = ['no', 'yes']
    conditional_probability = []
    list_x_name = []

    for i in range(train_data.shape[1] - 1):
        x_unique = np.unique(train_data[:, i])
        list_x_name.append(x_unique)
        p_0 = []
        p_1 = []
        for idx,x  in enumerate(x_unique):
           # print(x,'---',i, len(np.where((train_data[:,i]==x) & (train_data[:,-1]=='yes'))[0]))
            p_1_x = len(np.where((train_data[:,i]==x) & (train_data[:,-1]=='yes'))[0])/len(np.where(train_data[:,-1]=='yes')[0])
            p_0_x = len(np.where((train_data[:,i]==x) & (train_data[:,-1]=='no'))[0])/len(np.where(train_data[:,-1]=='no')[0])
            p_0.append(p_0_x)
            p_1.append(p_1_x)
        conditional_probability.append([p_0, p_1])
    #print(conditional_probability)
    return conditional_probability, list_x_name
#compute_conditional_probability(train_data)

In [79]:

# train_data = create_train_data()
# conditional_probability, list_x_name = compute_conditional_probability(train_data)

# # Compute P("Outlook" = "Sunny" | Play Tennis = "Yes")
# x1 = get_index_from_value("Sunny", list_x_name[0])
# print(list_x_name[0],x1)
# print("P('Outlook'='Sunny'|Play Tennis='Yes') = ", conditional_probability[0][1][x1])

In [80]:
train_data = create_train_data()
_, list_x_name = compute_conditional_probability(train_data)

print("x1 =", list_x_name[0])
print("x2 =", list_x_name[1])
print("x3 =", list_x_name[2])
print("x4 =", list_x_name[3])

x1 = ['Overcast' 'Rain' 'Sunny']
x2 = ['Cool' 'Hot' 'Mild']
x3 = ['High' 'Normal']
x4 = ['Strong' 'Weak']


In [81]:
import numpy as np

def get_index_from_value(feature_name, list_features):
    return np.where(list_features == feature_name)[0][0]

train_data = create_train_data()
_, list_x_name = compute_conditional_probability(train_data)
outlook = list_x_name[0]

i1 = get_index_from_value("Overcast", outlook)
i2 = get_index_from_value("Rain", outlook)
i3 = get_index_from_value("Sunny", outlook)

print(i1, i2, i3)


0 1 2


In [82]:

train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)

# Compute P("Outlook" = "Sunny" | Play Tennis = "Yes")
x1 = get_index_from_value("Sunny", list_x_name[0])
print("P('Outlook'='Sunny'|Play Tennis='Yes') = ", np.round(conditional_probability[0][1][x1], 2))


P('Outlook'='Sunny'|Play Tennis='Yes') =  0.17


In [83]:

train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)

# Compute P("Outlook" = "Sunny" | Play Tennis = "Yes")
x1 = get_index_from_value("Sunny", list_x_name[0])
print("P('Outlook'='Sunny'|Play Tennis='Yes') = ", np.round(conditional_probability[0][0][x1], 2))


P('Outlook'='Sunny'|Play Tennis='Yes') =  0.5


In [84]:
def train_naive_bayes(train_data):
    # Step 1: Calculate Prior Probability
    y_unique = ['no', 'yes']
    prior_probability = compute_prior_probability(train_data)

    # Step 2: Calculate Conditional Probability
    conditional_probability, list_x_name = compute_conditional_probability(train_data)

    return prior_probability, conditional_probability, list_x_name

train_naive_bayes(train_data)

(array([0.4, 0.6]),
 [[[0.25, 0.25, 0.5], [0.3333333333333333, 0.5, 0.16666666666666666]],
  [[0.25, 0.5, 0.25], [0.5, 0.16666666666666666, 0.3333333333333333]],
  [[0.75, 0.25], [0.3333333333333333, 0.6666666666666666]],
  [[0.5, 0.5], [0.16666666666666666, 0.8333333333333334]]],
 [array(['Overcast', 'Rain', 'Sunny'], dtype='<U8'),
  array(['Cool', 'Hot', 'Mild'], dtype='<U8'),
  array(['High', 'Normal'], dtype='<U8'),
  array(['Strong', 'Weak'], dtype='<U8')])

In [85]:
def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
    # Get the index of each feature value
    x1 = get_index_from_value(X[0], list_x_name[0])
    x2 = get_index_from_value(X[1], list_x_name[1])
    x3 = get_index_from_value(X[2], list_x_name[2])
    x4 = get_index_from_value(X[3], list_x_name[3])
    day_feature = [x1,x2,x3,x4]
    # Initialize probabilities for each class
    p0 = prior_probability[0]
    p1 = prior_probability[1]
    for col, feature in enumerate(day_feature):
        p0 *= conditional_probability[col][0][feature]
        p1 *= conditional_probability[col][1][feature]
    if p0>=p1:
        return 'yes'
    else:
        return 'no'
    
# Define the input features
X = ['Sunny', 'Cool', 'High', 'Strong']

# Create the training data
data = create_train_data()

# Train the Naive Bayes model
prior_probability, conditional_probability, list_x_name = train_naive_bayes(data)

# Predict the class label for the input features
pred = prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability)

# Print the prediction result
if pred == 'yes':
    print("Ad should go!")
else:
    print("Ad should not go!")


Ad should go!
