### Import Libraries

In [1]:
import pandas as pd
import numpy as np

### Load dataset

In [2]:
df = pd.read_csv('dataset/dataset.csv')

In [3]:
df.head()

Unnamed: 0,Outlook,Temp,Hum,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [4]:
df.shape

(31, 5)

### Split dataset into X and Y

In [5]:
X = df.drop(columns='Play Tennis')
y = df['Play Tennis']

In [6]:
X.head()

Unnamed: 0,Outlook,Temp,Hum,Wind
0,Sunny,Hot,High,Weak
1,Sunny,Hot,High,Strong
2,Overcast,Hot,High,Weak
3,Rain,Mild,High,Weak
4,Rain,Cool,Normal,Weak


In [7]:
y.head()

0     No
1     No
2    Yes
3    Yes
4    Yes
Name: Play Tennis, dtype: object

### Get size of training datasize and its features to train

In [8]:
X.shape

(31, 4)

In [9]:
train_size = X.shape[0]
total_features = X.shape[1]

### Define dictionaries for likelihoods, class priors, and prior probabilities of features

In [10]:
likelihoods = {}
class_prior = {}
pred_priors = {}

### Get a list of columns in the feature dataset

In [11]:
columns = list(X.columns)
columns

['Outlook', 'Temp', 'Hum', 'Wind']

### Initialize the likelihoods and prior probability dictionaries

In [12]:
for column in columns:
    likelihoods[column] = {} # Dictionary to hold likelihoods for each feature
    pred_priors[column] = {} # Dictionary to hold prior probabilities for each feature value

    for col_value in np.unique(X[column]):  # For each unique value in the feature
        # Initialize prior count for each feature value
        pred_priors[column].update({col_value:0})

        for outcome in np.unique(y):  # For each unique outcome in y
            # Initialize likelihood count for each value/outcome combination
            likelihoods[column].update({col_value+'_'+outcome:0})
            # Initialize class prior count for each outcome
            class_prior.update({outcome:0})

### Display dictionaries

In [13]:
likelihoods

{'Outlook': {'Overcast_No': 0,
  'Overcast_Yes': 0,
  'Rain_No': 0,
  'Rain_Yes': 0,
  'Sunny_No': 0,
  'Sunny_Yes': 0},
 'Temp': {' Mild_No': 0,
  ' Mild_Yes': 0,
  'Cool_No': 0,
  'Cool_Yes': 0,
  'Hot_No': 0,
  'Hot_Yes': 0,
  'Mild_No': 0,
  'Mild_Yes': 0},
 'Hum': {'High_No': 0, 'High_Yes': 0, 'Normal_No': 0, 'Normal_Yes': 0},
 'Wind': {'Strong_No': 0, 'Strong_Yes': 0, 'Weak_No': 0, 'Weak_Yes': 0}}

In [14]:
pred_priors

{'Outlook': {'Overcast': 0, 'Rain': 0, 'Sunny': 0},
 'Temp': {' Mild': 0, 'Cool': 0, 'Hot': 0, 'Mild': 0},
 'Hum': {'High': 0, 'Normal': 0},
 'Wind': {'Strong': 0, 'Weak': 0}}

In [15]:
class_prior

{'No': 0, 'Yes': 0}

### Calculate prior class probabilities

In [16]:
for val in np.unique(y):
    unique_count = sum(val==y) # Count occurrences of the current outcome
    class_prior[val] = unique_count # Update the class prior count

### Display class prior probabilities

In [17]:
class_prior

{'No': 12, 'Yes': 19}

### Calculate prior probabilities for each feature value

In [18]:
for col in columns:
    for val in np.unique(X[col]):  # For each unique value in the feature
        unique_count = sum(val == X[col])  # Count occurrences of the feature value
        pred_priors[col][val] = unique_count / train_size  # Calculate prior probability

In [19]:
pred_priors

{'Outlook': {'Overcast': 0.3225806451612903,
  'Rain': 0.3225806451612903,
  'Sunny': 0.3548387096774194},
 'Temp': {' Mild': 0.03225806451612903,
  'Cool': 0.3225806451612903,
  'Hot': 0.3225806451612903,
  'Mild': 0.3225806451612903},
 'Hum': {'High': 0.5483870967741935, 'Normal': 0.45161290322580644},
 'Wind': {'Strong': 0.45161290322580644, 'Weak': 0.5483870967741935}}

In [20]:
likelihoods

{'Outlook': {'Overcast_No': 0,
  'Overcast_Yes': 0,
  'Rain_No': 0,
  'Rain_Yes': 0,
  'Sunny_No': 0,
  'Sunny_Yes': 0},
 'Temp': {' Mild_No': 0,
  ' Mild_Yes': 0,
  'Cool_No': 0,
  'Cool_Yes': 0,
  'Hot_No': 0,
  'Hot_Yes': 0,
  'Mild_No': 0,
  'Mild_Yes': 0},
 'Hum': {'High_No': 0, 'High_Yes': 0, 'Normal_No': 0, 'Normal_Yes': 0},
 'Wind': {'Strong_No': 0, 'Strong_Yes': 0, 'Weak_No': 0, 'Weak_Yes': 0}}

### Calculate likelihood probabilities for each feature given each outcome

In [21]:
for column in columns:
    for outcome in np.unique(y):  # For each outcome
        outcome_count = sum(outcome == y)  # Count occurrences of the outcome
        # Get the feature values where y matches the outcome
        feature_likelihood = X[column][y[y == outcome].index.values.tolist()].value_counts().to_dict()
        
        # Update likelihoods based on feature values
        for col_val, count in feature_likelihood.items():
            likelihoods[column][col_val + '_' + outcome] = count / outcome_count  # Calculate probability

In [22]:
likelihoods

{'Outlook': {'Overcast_No': 0,
  'Overcast_Yes': 0.5263157894736842,
  'Rain_No': 0.5,
  'Rain_Yes': 0.21052631578947367,
  'Sunny_No': 0.5,
  'Sunny_Yes': 0.2631578947368421},
 'Temp': {' Mild_No': 0,
  ' Mild_Yes': 0.05263157894736842,
  'Cool_No': 0.3333333333333333,
  'Cool_Yes': 0.3157894736842105,
  'Hot_No': 0.3333333333333333,
  'Hot_Yes': 0.3157894736842105,
  'Mild_No': 0.3333333333333333,
  'Mild_Yes': 0.3157894736842105},
 'Hum': {'High_No': 0.8333333333333334,
  'High_Yes': 0.3684210526315789,
  'Normal_No': 0.16666666666666666,
  'Normal_Yes': 0.631578947368421},
 'Wind': {'Strong_No': 0.5,
  'Strong_Yes': 0.42105263157894735,
  'Weak_No': 0.5,
  'Weak_Yes': 0.5789473684210527}}

### Calculate Posterior Probability for a test case

In [23]:
test = ['Sunny', 'Mild', 'Normal', 'Strong']  # New instance for which we want to predict the outcome

In [24]:
results = {} # Dictionary to hold the posterior probabilities for each outcome

In [25]:
for outcome in np.unique(y):  # For each outcome
    prior = class_prior[outcome]  # Prior probability of the outcome
    evidence = 1  # Initialize evidence probability
    likelihood = 1  # Initialize likelihood probability
    
    for col, col_val in zip(columns, test):  # Iterate through the features and their values
        likelihood *= likelihoods[col][col_val + '_' + outcome]  # Multiply the likelihood
        evidence *= pred_priors[col][col_val]  # Multiply the prior probability for evidence
    
    # Calculate the posterior probability using Bayes' theorem
    posterior = likelihood * prior / evidence
    results[outcome] = posterior  # Store the posterior probability

### Display results of posterior probabilities

In [26]:
results

{'No': 7.13915429808287, 'Yes': 17.98579767763114}

In [27]:
### Determine the outcome with the highest posterior probability

In [28]:
result = max(results, key=lambda x : results[x])

In [29]:
result

'Yes'