In [1]:
# Import required libraries
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
# Creating the dataset based on the given table
data = {
    "Genre": ["Action", "Comedy", "Drama", "Action", "Horror", "Drama", "Action",
              "Comedy", "Horror", "Action", "Comedy", "Drama", "Horror", "Drama"],
    "Watched": ["Yes", "No", "Yes", "No", "Yes", "No", "Yes", 
                "Yes", "No", "Yes", "Yes", "No", "Yes", "No"],
    "Classification": ["Good", "Bad", "Good", "Bad", "Good", "Bad", "Good", 
                       "Good", "Bad", "Good", "Good", "Bad", "Good", "Bad"]
}

In [3]:
# Convert the dataset into a pandas DataFrame
df = pd.DataFrame(data)

In [4]:
# Label Encoding: Convert categorical text data into numbers
genre_le = preprocessing.LabelEncoder()
watched_le = preprocessing.LabelEncoder()
classification_le = preprocessing.LabelEncoder()

In [5]:
# Encode each feature and label
df['Genre_encoded'] = genre_le.fit_transform(df['Genre'])  # Encode Genre
df['Watched_encoded'] = watched_le.fit_transform(df['Watched'])  # Encode Watched
df['Classification_encoded'] = classification_le.fit_transform(df['Classification'])  # Encode Classification (label)

In [6]:
# Combine features into a single list of tuples
features = df[['Genre_encoded', 'Watched_encoded']].values
labels = df['Classification_encoded'].values

In [7]:
# Split the dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [8]:
# Train the Naive Bayes classifier
model = GaussianNB()
model.fit(features_train, label_train)

In [9]:
# Perform prediction on the test set
predicted = model.predict(features_test)

In [10]:
# Print predictions for the test set
print("Predictions for test set:", predicted)

Predictions for test set: [1 0 1]


In [12]:
# Calculate and print confusion matrix and accuracy
conf_mat = confusion_matrix(label_test, predicted)
print("Confusion Matrix:")
print(conf_mat)

accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)

Confusion Matrix:
[[1 0]
 [0 2]]
Accuracy: 1.0


In [13]:
# Prediction for specific case: Genre = "Drama", Watched = "Yes"
# Encode the input to match the encoding used in the model
test_input = [[genre_le.transform(['Drama'])[0], 
               watched_le.transform(['Yes'])[0]]]
predicted_classification = model.predict(test_input)
print("Prediction for Drama/Yes:", "Good" if predicted_classification[0] == 1 else "Bad")

Prediction for Drama/Yes: Good
