In [2]:
import numpy as np

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probabilities = {}
        self.feature_probabilities = {}

    def fit(self, X_train, y_train):
        # Calculate class probabilities
        total_samples = len(y_train)
        classes, class_counts = np.unique(y_train, return_counts=True)
        for class_, class_count in zip(classes, class_counts):
            self.class_probabilities[class_] = class_count / total_samples

        # Calculate feature probabilities for each class
        for feature_index in range(X_train.shape[1]):
            self.feature_probabilities[feature_index] = {}
            unique_values = np.unique(X_train[:, feature_index])
            for class_, class_count in zip(classes, class_counts):
                self.feature_probabilities[feature_index][class_] = {}
                for value in unique_values:
                    # Count occurrences of value in class
                    class_mask = y_train == class_
                    value_count = np.sum(X_train[class_mask][:, feature_index] == value)
                    # Calculate probability of feature value given class
                    self.feature_probabilities[feature_index][class_][value] = value_count / class_count

    def predict(self, X_test):
        predictions = []
        for sample in X_test:
            max_probability = -1
            predicted_class = None
            for class_, class_prob in self.class_probabilities.items():
                probability = class_prob
                for feature_index, feature_value in enumerate(sample):
                    if feature_index in self.feature_probabilities:
                        if class_ in self.feature_probabilities[feature_index] and feature_value in self.feature_probabilities[feature_index][class_]:
                            probability *= self.feature_probabilities[feature_index][class_][feature_value]
                        else:
                            # If feature value not seen in training, set probability to zero
                            probability = 0
                            break
                if probability > max_probability:
                    max_probability = probability
                    predicted_class = class_
            predictions.append(predicted_class)
        return predictions

def preprocess_data(data):
    processed_data = []
    for row in data:
        processed_row = []
        for value in row:
            processed_row.append('unknown' if value == '?' else value)
        processed_data.append(processed_row)
    return processed_data

data = [
    ['<=30', 'low', 'no', 'fair', '??'],
    ['31...40', 'high', 'yes', 'excellent', '??'],
    ['<=30', 'medium', 'yes', 'fair', '??'],
    ['>40', 'high', 'no', 'excellent', '??']
]

processed_data = preprocess_data(data)

# Mapping for categorical values
age_mapping = {'<=30': 0, '31...40': 1, '>40': 2}
income_mapping = {'low': 0, 'medium': 1, 'high': 2}
student_mapping = {'no': 0, 'yes': 1}
credit_rating_mapping = {'fair': 0, 'excellent': 1}
class_mapping = {'no': 0, 'yes': 1}  # Class mapping

X_train = np.array([
    [age_mapping[row[0]], income_mapping[row[1]], student_mapping[row[2]], credit_rating_mapping[row[3]]]
    for row in processed_data
])

# Training data
y_train = np.array([0, 1, 1, 0])  # Assuming the classes are encoded as integers (0 for 'no' and 1 for 'yes')

# Create and train the Naive Bayes classifier
classifier = NaiveBayesClassifier()
classifier.fit(X_train, y_train)

# Test data
data_to_predict = [
    ['<=30', 'low', 'no', 'fair', '??'],
    ['31...40', 'high', 'yes', 'excellent', '??'],
    ['<=30', 'medium', 'yes', 'fair', '??'],
    ['>40', 'high', 'no', 'excellent', '??']
]

processed_data_to_predict = preprocess_data(data_to_predict)
X_to_predict = np.array([
    [age_mapping[row[0]], income_mapping[row[1]], student_mapping[row[2]], credit_rating_mapping[row[3]]]
    for row in processed_data_to_predict
])

# Make predictions
predicted_labels = classifier.predict(X_to_predict)

# Convert numerical class labels back to original labels
predicted_classes = ['yes' if label == 1 else 'no' for label in predicted_labels]

# Print the predicted classes
for i, data_point in enumerate(data_to_predict):
    print(f"Data Point {chr(65 + i)}: {data_point} => Predicted Class: {predicted_classes[i]}")

Data Point A: ['<=30', 'low', 'no', 'fair', '??'] => Predicted Class: no
Data Point B: ['31...40', 'high', 'yes', 'excellent', '??'] => Predicted Class: yes
Data Point C: ['<=30', 'medium', 'yes', 'fair', '??'] => Predicted Class: yes
Data Point D: ['>40', 'high', 'no', 'excellent', '??'] => Predicted Class: no
