In [2]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Data preparation
# Creating the dataset
data = {
    'Age': ['youth', 'youth', 'middle_aged', 'senior', 'senior', 'senior', 'middle_aged', 'youth', 'youth', 'senior', 'youth', 'middle_aged', 'middle_aged', 'senior'],
    'Income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 'medium', 'medium', 'high', 'medium'],
    'Student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no'],
    'Credit_Rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'Buys_Computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Creating DataFrame
df = pd.DataFrame(data)

# Encoding categorical data to numeric using LabelEncoder
labelEncoder = LabelEncoder()

# Encoding each column
df['Age'] = labelEncoder.fit_transform(df['Age'])
df['Income'] = labelEncoder.fit_transform(df['Income'])
df['Student'] = labelEncoder.fit_transform(df['Student'])
df['Credit_Rating'] = labelEncoder.fit_transform(df['Credit_Rating'])
df['Buys_Computer'] = labelEncoder.fit_transform(df['Buys_Computer'])

# Displaying the encoded DataFrame
print("Encoded DataFrame:")
print(df)

# Separating features and target variable
X = df[['Age', 'Income', 'Student', 'Credit_Rating']]
y = df['Buys_Computer']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Initializing Naive Bayes model
model = GaussianNB()

# Training the model
model.fit(X_train, y_train)

# Creating input data for the specific instance "youth/medium/yes/fair"
# Encoding the specific input as per the encoding in our dataset
input_data = pd.DataFrame([[1, 1, 1, 1]], columns=['Age', 'Income', 'Student', 'Credit_Rating'])

# Predicting for the specific input
prediction = model.predict(input_data)

# Decoding the prediction to the original label
le_buys_computer = LabelEncoder()
le_buys_computer.fit(['no', 'yes'])
predicted_output = le_buys_computer.inverse_transform(prediction)

# Displaying the prediction for the input data
print(f"Prediction for Age=youth, Income=medium, Student=yes, Credit_Rating=fair: {predicted_output[0]}")

# Predicting on the test set
y_pred = model.predict(X_test)

# Calculating and displaying confusion matrix and accuracy
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Encoded DataFrame:
    Age  Income  Student  Credit_Rating  Buys_Computer
0     2       0        0              1              0
1     2       0        0              0              0
2     0       0        0              1              1
3     1       2        0              1              1
4     1       1        1              1              1
5     1       1        1              0              0
6     0       1        1              0              1
7     2       2        0              1              0
8     2       1        1              1              1
9     1       2        1              1              1
10    2       2        1              0              1
11    0       2        0              0              1
12    0       0        1              1              1
13    1       2        0              0              0
Prediction for Age=youth, Income=medium, Student=yes, Credit_Rating=fair: yes
Confusion Matrix:
[[1 1]
 [2 3]]
Accuracy: 0.57
