In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 
                'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 
                'hours-per-week', 'native-country', 'income']
data = pd.read_csv(url, names=column_names, na_values=' ?', skipinitialspace=True)

# Drop rows with missing values
data = data.dropna()

# Convert income column to binary labels
data['income'] = data['income'].map({'<=50K': 0, '>50K': 1})

# Select features and target variable
X = data.drop('income', axis=1)
y = data['income']

# Convert categorical variables to dummy variables
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Gaussian Naive Bayes classifier
clf = GaussianNB()
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

print("Predictions:", y_pred)

# Compute confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(conf_matrix)

# Extract TP, TN, FP, FN from the confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Compute Sensitivity and Specificity
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

# Compute Posterior Probability
posterior_probability = sum(y_pred) / len(y_pred)

print("Sensitivity (True Positive Rate):", sensitivity)
print("Specificity (True Negative Rate):", specificity)
print("Posterior Probability of making over 50K a year:", posterior_probability)


Predictions: [0 0 0 ... 1 0 0]
Confusion Matrix:
[[4702  240]
 [1068  503]]
Sensitivity (True Positive Rate): 0.32017823042647997
Specificity (True Negative Rate): 0.9514366653176851
Posterior Probability of making over 50K a year: 0.11407953324120988
