<a href="https://colab.research.google.com/github/aidanbolinger/ArtificialIntelligence/blob/main/HW4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Question 4
import pandas as pd
import numpy as np

# 1. Print and verify the dataset
data = pd.DataFrame({
    'Free':    [1, 0, 1, 0, 1, 0],
    'Win':     [1, 1, 0, 0, 1, 0],
    'Hello':   [0, 0, 1, 1, 0, 1],
    'Click':   [1, 1, 0, 0, 1, 0],
    'Offer':   [1, 1, 0, 0, 0, 0],
    'Label':   ['Spam', 'Spam', 'Not Spam', 'Not Spam', 'Spam', 'Not Spam']
})

print("Dataset:")
print(data)

# 2. Compute prior probabilities
def compute_priors(df):
    total = len(df)
    priors = df['Label'].value_counts().to_dict()
    return {label: count / total for label, count in priors.items()}

priors = compute_priors(data)
print("\nPrior Probabilities:")
print(priors)

# 3. Compute conditional probabilities with Laplace smoothing
def compute_conditional_probs(df, features, label_col='Label'):
    cond_probs = {}
    labels = df[label_col].unique()
    for label in labels:
        subset = df[df[label_col] == label]
        label_probs = {}
        for feature in features:
            # Laplace smoothing: add 1 to numerator, +2 to denominator (binary features)
            prob_feature_given_label = (subset[feature].sum() + 1) / (len(subset) + 2)
            label_probs[feature] = prob_feature_given_label
        cond_probs[label] = label_probs
    return cond_probs

features = ['Free', 'Win', 'Hello', 'Click', 'Offer']
cond_probs = compute_conditional_probs(data, features)

print("\nConditional Probabilities (with Laplace Smoothing):")
for label in cond_probs:
    print(f"{label}: {cond_probs[label]}")

# 4. Naïve Bayes Classifier Function
def classify(email, priors, cond_probs):
    log_probs = {}
    for label in priors:
        log_prob = np.log(priors[label])  # Start with log prior
        for feature in email:
            prob = cond_probs[label][feature]
            # If feature is 1, use P(feature=1 | label); else use P(feature=0 | label) = 1 - P(feature=1 | label)
            log_prob += np.log(prob if email[feature] == 1 else (1 - prob))
        log_probs[label] = log_prob
    return max(log_probs, key=log_probs.get)

# Test email
new_email = {
    'Free': 1,
    'Win': 1,
    'Hello': 0,
    'Click': 1,
    'Offer': 0
}

prediction = classify(new_email, priors, cond_probs)
print("\nNaive Bayes Classifier Function calculation:")
print(f"Prediction for new email: {prediction}")

Dataset:
   Free  Win  Hello  Click  Offer     Label
0     1    1      0      1      1      Spam
1     0    1      0      1      1      Spam
2     1    0      1      0      0  Not Spam
3     0    0      1      0      0  Not Spam
4     1    1      0      1      0      Spam
5     0    0      1      0      0  Not Spam

Prior Probabilities:
{'Spam': 0.5, 'Not Spam': 0.5}

Conditional Probabilities (with Laplace Smoothing):
Spam: {'Free': np.float64(0.6), 'Win': np.float64(0.8), 'Hello': np.float64(0.2), 'Click': np.float64(0.8), 'Offer': np.float64(0.6)}
Not Spam: {'Free': np.float64(0.4), 'Win': np.float64(0.2), 'Hello': np.float64(0.8), 'Click': np.float64(0.2), 'Offer': np.float64(0.2)}

Naive Bayes Classifier Function calculation:
Prediction for new email: Spam


In [7]:
#Question 5
import numpy as np

# Training frequency counts for each feature per class
training_data = {
    'Outlook': {
        'Sunny': {'Yes': 2, 'No': 3},
        'Overcast': {'Yes': 4, 'No': 0},
        'Rain': {'Yes': 3, 'No': 2}
    },
    'Temperature': {
        'Hot': {'Yes': 2, 'No': 2},
        'Mild': {'Yes': 4, 'No': 2},
        'Cool': {'Yes': 3, 'No': 1}
    },
    'Humidity': {
        'High': {'Yes': 3, 'No': 4},
        'Normal': {'Yes': 6, 'No': 1}
        # 'Low' not seen in training
    },
    'Wind': {
        'Weak': {'Yes': 6, 'No': 2},
        'Strong': {'Yes': 3, 'No': 3}
        # 'Gusty' not seen in training
    }
}

# Total class counts
class_totals = {
    'Yes': 9,
    'No': 5
}

# All possible values per feature (used for smoothing)
feature_values = {
    'Outlook': ['Sunny', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Mild', 'Cool'],
    'Humidity': ['High', 'Normal', 'Low'],
    'Wind': ['Weak', 'Strong', 'Gusty']
}

# Test instance
X_test = {
    'Outlook': 'Sunny',
    'Temperature': 'Mild',
    'Humidity': 'Low',
    'Wind': 'Gusty'
}

def build_conditional_prob_tables(data, totals, features):
    prob_tables = {}
    for feature in features:
        prob_tables[feature] = {}
        for value in features[feature]:
            prob_tables[feature][value] = {}
            for label in ['Yes', 'No']:
                count = data.get(feature, {}).get(value, {}).get(label, 0)
                k = len(features[feature])
                smoothed_prob = (count + 1) / (totals[label] + k)
                prob_tables[feature][value][label] = smoothed_prob
    return prob_tables

def compute_posteriors(X_test, cond_probs, class_totals):
    total = sum(class_totals.values())
    posteriors = {}
    for label in ['Yes', 'No']:
        log_prob = np.log(class_totals[label] / total)
        for feature, value in X_test.items():
            prob = cond_probs[feature][value][label]
            log_prob += np.log(prob)
        posteriors[label] = log_prob
    return posteriors

# Step 1: Build conditional probability tables with Laplace smoothing
conditional_probs = build_conditional_prob_tables(training_data, class_totals, feature_values)

# Step 2 & 3: Compute posteriors and make prediction
posterior_probs = compute_posteriors(X_test, conditional_probs, class_totals)

# Output
print("Posterior Log Probabilities:")
for label, log_prob in posterior_probs.items():
    print(f"{label}: {log_prob:.4f}")

prediction = max(posterior_probs, key=posterior_probs.get)
print(f"\nPrediction: Play Tennis? ➤ {prediction}")


Posterior Log Probabilities:
Yes: -7.6734
No: -6.8625

Prediction: Play Tennis? ➤ No


In [1]:
#Question 6
import numpy as np

# Step 1: Initialize
weights = np.array([0.0, 0.0])
bias = 0.0
learning_rate = 0.1

# Step 2: Define data
X = np.array([
    [1.4, 0.2], [1.5, 0.3], [1.3, 0.2], [1.6, 0.4], [1.4, 0.3],  # Setosa (-1)
    [4.5, 1.5], [4.7, 1.6], [4.6, 1.4], [4.9, 1.5], [5.1, 1.8]   # Versicolor (+1)
])
y = np.array([-1, -1, -1, -1, -1, +1, +1, +1, +1, +1])

#Training loop
epochs = 20
updates = 0

for epoch in range(epochs):
    for xi, yi in zip(X, y):
        prediction = np.sign(np.dot(weights, xi) + bias)
        if prediction == 0:
            prediction = 1  # treat zero as positive
        if prediction != yi:
            weights += learning_rate * yi * xi
            bias += learning_rate * yi
            updates += 1

# Step 3:Final weights and bias
print("Final weights:", weights)
print("Final bias:", bias)

#Predictions
predictions = np.sign(np.dot(X, weights) + bias)
predictions[predictions == 0] = 1
print("Predictions:", predictions)

#Number of updates
print("Number of updates during training:", updates)

# Step 4: Confusion Matrix
TP = sum((predictions == 1) & (y == 1))
TN = sum((predictions == -1) & (y == -1))
FP = sum((predictions == 1) & (y == -1))
FN = sum((predictions == -1) & (y == 1))

print("\nConfusion Matrix:")
print("TP:", TP)
print("TN:", TN)
print("FP:", FP)
print("FN:", FN)

# Step 5: Metrics
accuracy = (TP + TN) / len(y)
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

print("\nEvaluation Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1_score)


Final weights: [0.02 0.08]
Final bias: -0.2
Predictions: [-1. -1. -1. -1. -1.  1.  1.  1.  1.  1.]
Number of updates during training: 4

Confusion Matrix:
TP: 5
TN: 5
FP: 0
FN: 0

Evaluation Metrics:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
