In [2]:
# Import the required libraries
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Dataset
weather = ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 
           'Rainy', 'Sunny', 'Overcast', 'Overcast', 'Rainy']
temperature = ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 
               'Mild', 'Mild', 'Mild', 'Hot', 'Mild']
play = ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 
        'Yes', 'Yes', 'Yes', 'Yes', 'No']

# Convert categorical features to numeric values using Label Encoding
le = preprocessing.LabelEncoder()
weather_encoded = le.fit_transform(weather)
temperature_encoded = le.fit_transform(temperature)
play_encoded = le.fit_transform(play)

# Combine weather and temperature features into a single list of tuples
features = list(zip(weather_encoded, temperature_encoded))

# Split dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, play_encoded, test_size=0.2, random_state=42)

# Generate a model using Naive Bayes classifier
model = GaussianNB()

# Fit the model on the training data
model.fit(features_train, label_train)

# Predict on the test data
predicted = model.predict(features_test)

# Print prediction results for the test data
print("Prediction on test data:", predicted)
print("Actual labels for test data:", label_test)

# Confusion Matrix and Accuracy
conf_mat = confusion_matrix(label_test, predicted)
accuracy = accuracy_score(label_test, predicted)

print("Confusion Matrix:")
print(conf_mat)
print("Accuracy:", accuracy)

# Predict on a new instance where weather = 'Overcast' and temperature = 'Mild'
# Encode 'Overcast' = 0, 'Mild' = 2 (based on label encoding used previously)
new_prediction = model.predict([[0, 2]])

# Print the prediction for the new instance
if new_prediction[0] == 1:
    print("Prediction for (Overcast, Mild): Yes, the players can play.")
else:
    print("Prediction for (Overcast, Mild): No, the players should not play.")


Prediction on test data: [0 1 0]
Actual labels for test data: [1 1 0]
Confusion Matrix:
[[1 0]
 [1 1]]
Accuracy: 0.6666666666666666
Prediction for (Overcast, Mild): Yes, the players can play.


In [3]:
# Importing necessary libraries
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

# Dataset: age, income, student, credit_rating and target variable (buys_computer)
age = ['youth', 'youth', 'middle_aged', 'senior', 'senior', 'senior', 'middle_aged', 'youth', 'youth', 'youth', 'youth', 'youth', 'middle_aged', 'middle_aged', 'senior', 'senior', 'youth', 'youth', 'middle_aged', 'middle_aged', 'senior', 'middle_aged', 'senior']
income = ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'medium', 'low', 'low', 'medium', 'medium', 'high', 'medium', 'medium', 'medium', 'high', 'medium', 'medium', 'medium', 'high']
student = ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes']
credit_rating = ['fair', 'excellent', 'fair', 'fair', 'fair', 'fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair']
buys_computer = ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']

# Step 1: Convert categorical data to numerical data using LabelEncoder
le = preprocessing.LabelEncoder()

# Encode each feature and the target variable
age_encoded = le.fit_transform(age)
income_encoded = le.fit_transform(income)
student_encoded = le.fit_transform(student)
credit_rating_encoded = le.fit_transform(credit_rating)
buys_computer_encoded = le.fit_transform(buys_computer)

# Step 2: Combine features into a list of tuples
features = list(zip(age_encoded, income_encoded, student_encoded, credit_rating_encoded))

# Step 3: Split the dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, buys_computer_encoded, test_size=0.2, random_state=42)

# Step 4: Create and train the Naive Bayes classifier
model = GaussianNB()
model.fit(features_train, label_train)

# Step 5: Predict the output using the test data
predicted = model.predict(features_test)

# Step 6: Evaluate the model using confusion matrix and accuracy score
conf_mat = confusion_matrix(label_test, predicted)
accuracy = accuracy_score(label_test, predicted)

# Step 7: Print the results
print("Confusion Matrix:")
print(conf_mat)
print("Accuracy:", accuracy)

# Step 8: Example prediction for a new data point
# Let's predict whether a 'youth' with 'medium' income, 'yes' student status, and 'fair' credit_rating will buy a computer
new_data = [[0, 1, 1, 0]]  # 'youth' = 0, 'medium' income = 1, 'yes' student = 1, 'fair' credit_rating = 0
new_prediction = model.predict(new_data)

# Print the prediction result
if new_prediction[0] == 1:
    print("Prediction: Yes, the youth will buy the computer.")
else:
    print("Prediction: No, the youth will not buy the computer.")


Confusion Matrix:
[[3 0]
 [0 2]]
Accuracy: 1.0
Prediction: Yes, the youth will buy the computer.


In [4]:
# Importing required libraries
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Sample dataset
# Data columns: Weather, Temperature, Play (Yes/No)
weather = ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy', 'Sunny', 'Overcast', 'Overcast', 'Rainy']
temperature = ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild']
play = ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']

# Step 1: Perform label encoding on the data columns
le = preprocessing.LabelEncoder()

# Encoding each column
weather_encoded = le.fit_transform(weather)
temperature_encoded = le.fit_transform(temperature)
play_encoded = le.fit_transform(play)

# Step 2: Combine features into a single list of tuples
features = list(zip(weather_encoded, temperature_encoded))

# Step 3: Split dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, play_encoded, test_size=0.2, random_state=42)

# Step 4: Initialize and train the Naive Bayes model
model = GaussianNB()
model.fit(features_train, label_train)

# Step 5: Perform prediction on the test data
predicted = model.predict(features_test)

# Step 6: Print prediction results
print("Predictions on test data:", predicted)
print("Actual labels for test data:", label_test)

# Step 7: Generate Confusion Matrix and calculate Accuracy
conf_mat = confusion_matrix(label_test, predicted)
accuracy = accuracy_score(label_test, predicted)

print("Confusion Matrix:")
print(conf_mat)
print("Accuracy:", accuracy)

# Interpretation (manual calculation for illustration)
TP = conf_mat[1, 1]  # True Positive: "Yes" predicted as "Yes"
TN = conf_mat[0, 0]  # True Negative: "No" predicted as "No"
FP = conf_mat[0, 1]  # False Positive: "No" predicted as "Yes"
FN = conf_mat[1, 0]  # False Negative: "Yes" predicted as "No"

# Accuracy Calculation
accuracy_manual = (TP + TN) / (TP + TN + FP + FN)
print("Manual Accuracy Calculation:", accuracy_manual)


Predictions on test data: [0 1 0]
Actual labels for test data: [1 1 0]
Confusion Matrix:
[[1 0]
 [1 1]]
Accuracy: 0.6666666666666666
Manual Accuracy Calculation: 0.6666666666666666
