In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Q1

In [2]:
import numpy as np

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probabilities = {}
        self.feature_probabilities = {}

    def fit(self, X_train, y_train):
        # Calculate class probabilities
        total_samples = len(y_train)
        classes, class_counts = np.unique(y_train, return_counts=True)
        for class_, class_count in zip(classes, class_counts):
            self.class_probabilities[class_] = class_count / total_samples

        # Calculate feature probabilities for each class
        for feature_index in range(X_train.shape[1]):
            self.feature_probabilities[feature_index] = {}
            unique_values = np.unique(X_train[:, feature_index])
            for class_, class_count in zip(classes, class_counts):
                self.feature_probabilities[feature_index][class_] = {}
                for value in unique_values:
                    # Count occurrences of value in class
                    class_mask = y_train == class_
                    value_count = np.sum(X_train[class_mask][:, feature_index] == value)
                    # Calculate probability of feature value given class
                    self.feature_probabilities[feature_index][class_][value] = value_count / class_count

    def predict(self, X_test):
        predictions = []
        for sample in X_test:
            max_probability = -1
            predicted_class = None
            for class_, class_prob in self.class_probabilities.items():
                probability = class_prob
                for feature_index, feature_value in enumerate(sample):
                    if feature_index in self.feature_probabilities:
                        if class_ in self.feature_probabilities[feature_index] and feature_value in self.feature_probabilities[feature_index][class_]:
                            probability *= self.feature_probabilities[feature_index][class_][feature_value]
                        else:
                            # If feature value not seen in training, set probability to zero
                            probability = 0
                            break
                if probability > max_probability:
                    max_probability = probability
                    predicted_class = class_
            predictions.append(predicted_class)
        return predictions

def preprocess_data(data):
    processed_data = []
    for row in data:
        processed_row = []
        for value in row:
            processed_row.append('unknown' if value == '?' else value)
        processed_data.append(processed_row)
    return processed_data

data = [
    ['<=30', 'low', 'no', 'fair', '??'],
    ['31...40', 'high', 'yes', 'excellent', '??'],
    ['<=30', 'medium', 'yes', 'fair', '??'],
    ['>40', 'high', 'no', 'excellent', '??']
]

processed_data = preprocess_data(data)

# Mapping for categorical values
age_mapping = {'<=30': 0, '31...40': 1, '>40': 2}
income_mapping = {'low': 0, 'medium': 1, 'high': 2}
student_mapping = {'no': 0, 'yes': 1}
credit_rating_mapping = {'fair': 0, 'excellent': 1}
class_mapping = {'no': 0, 'yes': 1}  # Class mapping

X_train = np.array([
    [age_mapping[row[0]], income_mapping[row[1]], student_mapping[row[2]], credit_rating_mapping[row[3]]]
    for row in processed_data
])

# Training data
y_train = np.array([0, 1, 1, 0])  # Assuming the classes are encoded as integers (0 for 'no' and 1 for 'yes')

# Create and train the Naive Bayes classifier
classifier = NaiveBayesClassifier()
classifier.fit(X_train, y_train)

# Test data
data_to_predict = [
    ['<=30', 'low', 'no', 'fair', '??'],
    ['31...40', 'high', 'yes', 'excellent', '??'],
    ['<=30', 'medium', 'yes', 'fair', '??'],
    ['>40', 'high', 'no', 'excellent', '??']
]

processed_data_to_predict = preprocess_data(data_to_predict)
X_to_predict = np.array([
    [age_mapping[row[0]], income_mapping[row[1]], student_mapping[row[2]], credit_rating_mapping[row[3]]]
    for row in processed_data_to_predict
])

# Make predictions
predicted_labels = classifier.predict(X_to_predict)

# Convert numerical class labels back to original labels
predicted_classes = ['yes' if label == 1 else 'no' for label in predicted_labels]

# Print the predicted classes
for i, data_point in enumerate(data_to_predict):
    print(f"Data Point {chr(65 + i)}: {data_point} => Predicted Class: {predicted_classes[i]}")

Data Point A: ['<=30', 'low', 'no', 'fair', '??'] => Predicted Class: no
Data Point B: ['31...40', 'high', 'yes', 'excellent', '??'] => Predicted Class: yes
Data Point C: ['<=30', 'medium', 'yes', 'fair', '??'] => Predicted Class: yes
Data Point D: ['>40', 'high', 'no', 'excellent', '??'] => Predicted Class: no


# Q4

In [4]:
class NaiveBayesClassifier:
    def __init__(self):
        self.prob_positive = 0
        self.prob_negative = 0
        self.prob_running_nose_positive = 0
        self.prob_running_nose_negative = 0
        self.prob_coughing_positive = 0
        self.prob_coughing_negative = 0
        self.prob_reddened_skin_positive = 0
        self.prob_reddened_skin_negative = 0
        self.prob_fever_positive = 0
        self.prob_fever_negative = 0

    def fit(self, data):
        total_instances = len(data)
        positive_instances = data.count('+')
        negative_instances = total_instances - positive_instances

        self.prob_positive = positive_instances / total_instances
        self.prob_negative = negative_instances / total_instances

        running_nose_positive = data.count('+,')
        running_nose_negative = data.count('-,')

        self.prob_running_nose_positive = (running_nose_positive + 1) / (positive_instances + 2)
        self.prob_running_nose_negative = (running_nose_negative + 1) / (negative_instances + 2)

        coughing_positive = data.count('+,-')
        coughing_negative = data.count('-,-')

        self.prob_coughing_positive = (coughing_positive + 1) / (positive_instances + 2)
        self.prob_coughing_negative = (coughing_negative + 1) / (negative_instances + 2)

        reddened_skin_positive = data.count('+-')
        reddened_skin_negative = data.count('--')

        self.prob_reddened_skin_positive = (reddened_skin_positive + 1) / (positive_instances + 2)
        self.prob_reddened_skin_negative = (reddened_skin_negative + 1) / (negative_instances + 2)

        fever_positive = data.count('--+')
        fever_negative = data.count('---')

        self.prob_fever_positive = (fever_positive + 1) / (positive_instances + 2)
        self.prob_fever_negative = (fever_negative + 1) / (negative_instances + 2)

    def predict(self, input_data):
        running_nose, coughing, reddened_skin, fever = input_data

        prob_positive_given_input = (
            self.prob_positive
            * self.prob_running_nose_positive if running_nose == '+' else self.prob_running_nose_negative
            * self.prob_coughing_positive if coughing == '+' else self.prob_coughing_negative
            * self.prob_reddened_skin_positive if reddened_skin == '+' else self.prob_reddened_skin_negative
            * self.prob_fever_positive if fever == '+' else self.prob_fever_negative
        )

        prob_negative_given_input = (
            self.prob_negative
            * self.prob_running_nose_negative if running_nose == '+' else self.prob_running_nose_positive
            * self.prob_coughing_negative if coughing == '+' else self.prob_coughing_positive
            * self.prob_reddened_skin_negative if reddened_skin == '+' else self.prob_reddened_skin_positive
            * self.prob_fever_negative if fever == '+' else self.prob_fever_positive
        )

        if prob_positive_given_input > prob_negative_given_input:
            return 'positive (ill)'
        else:
            return 'negative (healthy)'


class BayesOptimalClassifier:
    def __init__(self):
        self.prob_positive = 0
        self.prob_negative = 0
        self.prob_running_nose_positive = 0
        self.prob_running_nose_negative = 0
        self.prob_coughing_positive = 0
        self.prob_coughing_negative = 0
        self.prob_reddened_skin_positive = 0
        self.prob_reddened_skin_negative = 0
        self.prob_fever_positive = 0
        self.prob_fever_negative = 0

    def fit(self, data):
        total_instances = len(data)
        positive_instances = data.count('+')
        negative_instances = total_instances - positive_instances

        self.prob_positive = positive_instances / total_instances
        self.prob_negative = negative_instances / total_instances

        running_nose_positive = data.count('+,')
        running_nose_negative = data.count('-,')

        self.prob_running_nose_positive = (running_nose_positive + 1) / (positive_instances + 2)
        self.prob_running_nose_negative = (running_nose_negative + 1) / (negative_instances + 2)

        coughing_positive = data.count('+,-')
        coughing_negative = data.count('-,-')

        self.prob_coughing_positive = (coughing_positive + 1) / (positive_instances + 2)
        self.prob_coughing_negative = (coughing_negative + 1) / (negative_instances + 2)

        reddened_skin_positive = data.count('+-')
        reddened_skin_negative = data.count('--')

        self.prob_reddened_skin_positive = (reddened_skin_positive + 1) / (positive_instances + 2)
        self.prob_reddened_skin_negative = (reddened_skin_negative + 1) / (negative_instances + 2)

        fever_positive = data.count('--+')
        fever_negative = data.count('---')

        self.prob_fever_positive = (fever_positive + 1) / (positive_instances + 2)
        self.prob_fever_negative = (fever_negative + 1) / (negative_instances + 2)

    def predict(self, input_data):
        running_nose, coughing, reddened_skin, fever = input_data

        prob_positive_given_input = (
            self.prob_positive
            * self.prob_running_nose_positive
            * self.prob_coughing_positive
            * self.prob_reddened_skin_positive
            * self.prob_fever_positive
        )

        prob_negative_given_input = (
            self.prob_negative
            * self.prob_running_nose_negative
            * self.prob_coughing_negative
            * self.prob_reddened_skin_negative
            * self.prob_fever_negative
        )

        if prob_positive_given_input > prob_negative_given_input:
            return 'positive (ill)'
        else:
            return 'negative (healthy)'


def main():
    # Data: Running nose (+), Coughing (+), Reddened skin (+), Fever (-)
    data = ['+,', '+,-', '+-', '---']

    # Test input sequences
    inputs = [('-', '+', '-', '+'), ('+', '-', '-', '+'), ('+', '-', '+', '-')]
    
    # Create and train classifiers
    nb_classifier = NaiveBayesClassifier()
    bo_classifier = BayesOptimalClassifier()
    nb_classifier.fit(data)
    bo_classifier.fit(data)

    # Make predictions
    for inp in inputs:
        nb_prediction = nb_classifier.predict(inp)
        bo_prediction = bo_classifier.predict(inp)
        print(f"For input {inp}:")
        print("Naive Bayes Classifier prediction:", nb_prediction)
        print("Bayes Optimal Classifier prediction:", bo_prediction)


if __name__ == "__main__":
    main()


For input ('-', '+', '-', '+'):
Naive Bayes Classifier prediction: negative (healthy)
Bayes Optimal Classifier prediction: negative (healthy)
For input ('+', '-', '-', '+'):
Naive Bayes Classifier prediction: negative (healthy)
Bayes Optimal Classifier prediction: negative (healthy)
For input ('+', '-', '+', '-'):
Naive Bayes Classifier prediction: negative (healthy)
Bayes Optimal Classifier prediction: negative (healthy)


# Q2

In [14]:
import numpy as np
import pandas as pd

# Given training data
data = {
    'Outlook': ['sunny', 'sunny', 'overcast', 'rainy', 'overcast', 'sunny', 'sunny', 'overcast', 'rainy', 'overcast', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'sunny', 'overcast'],
    'Temperature': [85, 80, 83, 68, 70, 64, 72, 80, 65, 70, 64, 65, 72, 90, 81, 75, 71],
    'Humidity': [85, 90, 86, 80, 96, 65, 95, 67, 70, 96, 65, 70, 95, 90, 75, 91, 70],
    'Windy': [False, True, False, False, False, True, False, False, False, True, True, False, False, True, False, False, True],
    'Play': ['no', 'no', 'yes', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Given data to predict
data_to_predict = {
    'Outlook': [0, 1, 1, 1],
    'Temperature': [87, 67, 72, 80],
    'Humidity': [60, 64, 71, 67],
    'Windy': [0, 1, 0, 0]
}

# Helper function to calculate Gaussian probability
def gaussian_prob(x, mean, std):
    return (1 / (np.sqrt(2 * np.pi) * std)) * np.exp(-0.5 * ((x - mean) / std) ** 2)

# Helper function to train Gaussian Naive Bayes classifier
def train_gaussian_naive_bayes(X_train, y_train):
    classes = np.unique(y_train)
    class_probabilities = {}
    class_means = {}
    class_stds = {}

    for class_label in classes:
        X_class = X_train[y_train == class_label]
        class_probabilities[class_label] = len(X_class) / len(X_train)
        class_means[class_label] = X_class.mean(axis=0)
        class_stds[class_label] = X_class.std(axis=0)

    return class_probabilities, class_means, class_stds

# Helper function to make predictions using Gaussian Naive Bayes classifier
def predict_gaussian_naive_bayes(X_test, class_probabilities, class_means, class_stds):
    predictions = []
    for i in range(X_test.shape[0]):
        probabilities = {}
        for class_label in class_probabilities:
            probabilities[class_label] = class_probabilities[class_label]
            for j in range(X_test.shape[1]):
                probabilities[class_label] *= gaussian_prob(X_test[i, j], class_means[class_label][j], class_stds[class_label][j])
        predictions.append(max(probabilities, key=probabilities.get))
    return predictions

# Create pandas DataFrames for training and testing data
df_train = pd.DataFrame(data)
outlook_mapping = {'sunny': 0, 'overcast': 1, 'rainy': 2}
df_train['Outlook'] = df_train['Outlook'].map(outlook_mapping)
df_train['Windy'] = df_train['Windy'].astype(int)
X_train = df_train[['Outlook', 'Temperature', 'Humidity', 'Windy']].values
y_train = df_train['Play'].values

X_test = np.array([data_to_predict['Outlook'], data_to_predict['Temperature'], data_to_predict['Humidity'], data_to_predict['Windy']]).T

# Train Gaussian Naive Bayes classifier
class_probabilities, class_means, class_stds = train_gaussian_naive_bayes(X_train, y_train)

# Make predictions
predictions = predict_gaussian_naive_bayes(X_test, class_probabilities, class_means, class_stds)

# Add the predictions to the data_to_predict dictionary
data_to_predict['Play'] = predictions

# Create the DataFrame for the data to predict
df_to_predict = pd.DataFrame(data_to_predict)

# Display the final predictions
print(df_to_predict)


   Outlook  Temperature  Humidity  Windy Play
0        0           87        60      0   no
1        1           67        64      1  yes
2        1           72        71      0  yes
3        1           80        67      0  yes
