## NAIVE-BAYES

<H3>Manual</H3>

In [15]:
import pandas as pd

# Load dataset
df = pd.read_csv('weather.csv')

# Display column names for verification
print("Columns:", df.columns)

# Prior probabilities
prior_yes = df['Play'].value_counts(normalize=True)['Yes']
prior_no = df['Play'].value_counts(normalize=True)['No']

# Likelihood function with Laplace smoothing
def likelihood(feature, value, target, alpha=1):
    subset = df[df['Play'] == target]
    count = subset[feature].value_counts().get(value, 0)
    total = len(subset)
    unique_vals = df[feature].nunique()
    return (count + alpha) / (total + alpha * unique_vals)

# Predict function that returns prediction and posteriors
def predict(sample_row):
    # Likelihoods
    likelihoods_yes = [likelihood(k, v, 'Yes') for k, v in sample_row.items()]
    likelihoods_no = [likelihood(k, v, 'No') for k, v in sample_row.items()]

    # Multiply priors and likelihoods
    v_nb_yes = prior_yes
    v_nb_no = prior_no
    for val in likelihoods_yes:
        v_nb_yes *= val
    for val in likelihoods_no:
        v_nb_no *= val

    # Posterior probabilities
    total_prob = v_nb_yes + v_nb_no
    posterior_yes = v_nb_yes / total_prob
    posterior_no = v_nb_no / total_prob

    prediction = 'Yes' if posterior_yes > posterior_no else 'No'
    return prediction, posterior_yes, posterior_no

# --- Predict for a sample ---
sample = {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}
prediction, posterior_yes, posterior_no = predict(sample)

# Output for sample prediction
# Display step-by-step output
print("\nNAIVE BAYES CLASSIFIER – Example")
print(f"Sample: {sample}\n")

print(f"P(Play = Yes) = {prior_yes:.2f}")
print(f"P(Play = No)  = {prior_no:.2f}\n")

print("Likelihoods for 'Yes':")
for k, v in likelihoods_yes.items():
    print(f"{k} = {v:.3f}")

print("\nLikelihoods for 'No':")
for k, v in likelihoods_no.items():
    print(f"{k} = {v:.3f}")

print(f"\nNaive Bayes score for 'Yes': {v_nb_yes:.6f}")
print(f"Naive Bayes score for 'No' : {v_nb_no:.6f}\n")

print(f"Posterior probability for 'Yes' = {posterior_yes:.3f}")
print(f"Posterior probability for 'No'  = {posterior_no:.3f}")
print(f"\nPrediction: Play = {'Yes' if posterior_yes > posterior_no else 'No'}")



Columns: Index(['Outlook', 'Temperature', 'Humidity', 'Wind', 'Play'], dtype='object')

NAIVE BAYES CLASSIFIER – Example
Sample: {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}

P(Play = Yes) = 0.64
P(Play = No)  = 0.36

Likelihoods for 'Yes':
P(Outlook=Sunny|Yes) = 0.250
P(Temperature=Cool|Yes) = 0.333
P(Humidity=High|Yes) = 0.364
P(Wind=Strong|Yes) = 0.364

Likelihoods for 'No':
P(Outlook=Sunny|No) = 0.500
P(Temperature=Cool|No) = 0.250
P(Humidity=High|No) = 0.714
P(Wind=Strong|No) = 0.571

Naive Bayes score for 'Yes': 0.007084
Naive Bayes score for 'No' : 0.018222

Posterior probability for 'Yes' = 0.280
Posterior probability for 'No'  = 0.720

Prediction: Play = No


<h3>Using Library</h3>

In [12]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('weather.csv')

# Encode categorical variables
encoders = {}
for col in df.columns:
    encoders[col] = LabelEncoder()
    df[col] = encoders[col].fit_transform(df[col])
    # Optional: print mapping
    mapping = {k: int(v) for k, v in zip(encoders[col].classes_, encoders[col].transform(encoders[col].classes_))}
    print(f"{col} mapping: {mapping}")

# Split features and target
X = df.drop(columns=["Play"])
y = df["Play"]

# Train the Naive Bayes model
model = CategoricalNB()
model.fit(X, y)

# Predict on the training data to calculate accuracy
y_pred = model.predict(X)
accuracy = accuracy_score(y, y_pred)

# Sample to predict (raw, unencoded)
sample_dict = {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}
sample_df = pd.DataFrame([sample_dict])

# Encode the sample
for col in sample_df.columns:
    sample_df[col] = encoders[col].transform(sample_df[col])

# Predict class and probabilities
predicted = model.predict(sample_df)
predicted_label = encoders["Play"].inverse_transform(predicted)[0]
probabilities = model.predict_proba(sample_df)[0]

# Output results
print("\nNAIVE BAYES CLASSIFIER – Library Version")
print(f"Sample: {sample_dict}\n")
print(f"Posterior probability for 'Yes': {probabilities[1]:.3f}")
print(f"Posterior probability for 'No' : {probabilities[0]:.3f}")
print(f"\nPrediction: Play = {predicted_label}")
print(f"\nTraining Accuracy: {accuracy * 100:.2f}%")


Outlook mapping: {'Overcast': 0, 'Rain': 1, 'Sunny': 2}
Temperature mapping: {'Cool': 0, 'Hot': 1, 'Mild': 2}
Humidity mapping: {'High': 0, 'Normal': 1}
Wind mapping: {'Strong': 0, 'Weak': 1}
Play mapping: {'No': 0, 'Yes': 1}

NAIVE BAYES CLASSIFIER – Library Version
Sample: {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}

Posterior probability for 'Yes': 0.280
Posterior probability for 'No' : 0.720

Prediction: Play = No

Training Accuracy: 92.86%
