In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [62]:
df = pd.read_csv('PlayTennis.csv')
df.head()

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play Tennis
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes


In [63]:
# Step 3: Encode categorical data
label_encoders = {}
for column in df.columns:
    if df[column].dtype == 'object':  # Only encode categorical columns
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

In [64]:
# Step 4: Split data into features (X) and target (y)
target_column = input("\nEnter the target column name (e.g., 'Play Tennis'): ")

if target_column not in df.columns:
    print(f"Error: Column '{target_column}' not found in the dataset.")
    exit()

X = df.drop(columns=[target_column])
y = df[target_column]


Enter the target column name (e.g., 'Play Tennis'):  Play Tennis


In [84]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [85]:
# Step 4: Train the Naive Bayes model
model = CategoricalNB(alpha=1.0)  # Experiment with alpha for smoothing
model.fit(X_train, y_train)

In [86]:
# Step 6: Test the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [87]:
print("\nPredicted:", y_pred)
print("Actual:", y_test.values)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))


Predicted: [0 1 1 0 1]
Actual: [1 1 1 1 1]
Accuracy: 0.60
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.60      0.75         5

    accuracy                           0.60         5
   macro avg       0.50      0.30      0.38         5
weighted avg       1.00      0.60      0.75         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [95]:
# Step 6: Cross-validation
cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
print(f"\nCross-Validation Scores: {cv_scores}")
print(f"Mean CV Accuracy: {cv_scores.mean()}")
print(f"Standard Deviation: {cv_scores.std():.4f}")


Cross-Validation Scores: [0.66666667 1.         0.66666667 0.         0.5       ]
Mean CV Accuracy: 0.5666666666666667
Standard Deviation: 0.3266


In [88]:
# Decode predictions for interpretation
if target_column in label_encoders:
    decoded_y_pred = label_encoders[target_column].inverse_transform(y_pred)
    decoded_y_test = label_encoders[target_column].inverse_transform(y_test)
    print("Decoded Predictions:", decoded_y_pred)
    print("Decoded Actual:", decoded_y_test)


Decoded Predictions: ['No' 'Yes' 'Yes' 'No' 'Yes']
Decoded Actual: ['Yes' 'Yes' 'Yes' 'Yes' 'Yes']
