In [15]:
# Import required libraries
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

In [17]:
# Create the dataset based on the table
age = ['youth', 'youth', 'middle_aged', 'senior', 'senior', 'senior', 
       'middle_aged', 'youth', 'youth', 'senior', 'youth', 'middle_aged', 
       'middle_aged', 'senior']
income = ['high', 'high', 'high', 'medium', 'low', 'low', 
          'low', 'medium', 'low', 'medium', 'medium', 'medium', 
          'high', 'medium']
student = ['no', 'no', 'no', 'no', 'yes', 'yes', 
           'yes', 'no', 'yes', 'yes', 'yes', 'no', 
           'yes', 'no']
credit_rating = ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 
                 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 
                 'fair', 'excellent']
buys_computer = ['no', 'no', 'yes', 'yes', 'yes', 'no', 
                 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 
                 'yes', 'no']

In [19]:
# Label Encoding: Convert categorical text data into numbers using separate encoders for each feature
age_le = preprocessing.LabelEncoder()
income_le = preprocessing.LabelEncoder()
student_le = preprocessing.LabelEncoder()
credit_rating_le = preprocessing.LabelEncoder()
buys_computer_le = preprocessing.LabelEncoder()

In [37]:
# Encode each feature and label separately
age_encoded = age_le.fit_transform(age)
income_encoded = income_le.fit_transform(income)
student_encoded = student_le.fit_transform(student)
credit_rating_encoded = credit_rating_le.fit_transform(credit_rating)
buys_computer_encoded = buys_computer_le.fit_transform(buys_computer)

In [23]:
# Combine features into a single list of tuples
features = list(zip(age_encoded, income_encoded, student_encoded, credit_rating_encoded))

In [25]:
# Split the dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, buys_computer_encoded, test_size=0.2, random_state=42)

In [27]:
# Train the Naive Bayes classifier
model = GaussianNB()
model.fit(features_train, label_train)

In [29]:
# Perform prediction on the test set
predicted = model.predict(features_test)

In [31]:
# Print predictions for the test set
print("Predictions for test set:", predicted)

Predictions for test set: [1 1 1]


In [35]:
# Calculate and print confusion matrix and accuracy
conf_mat = confusion_matrix(label_test, predicted)
print("Confusion Matrix:")
print(conf_mat)

accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)


Confusion Matrix:
[[0 1]
 [0 2]]
Accuracy: 0.6666666666666666


In [39]:
# Prediction for specific case: age = 'youth', income = 'medium', student = 'yes', credit_rating = 'fair'
# Encode the input to match the encoding used in the model
test_input = [[age_le.transform(['youth'])[0], 
               income_le.transform(['medium'])[0], 
               student_le.transform(['yes'])[0], 
               credit_rating_le.transform(['fair'])[0]]]
predicted_buy = model.predict(test_input)
print("Prediction for youth/medium/yes/fair:", "Yes" if predicted_buy[0] == 1 else "No")

Prediction for youth/medium/yes/fair: Yes
