In [1]:
import pandas as pd

# Load the dataset
data = pd.DataFrame({
    'age': ['<=30', '<=30', '31...40', '>40', '>40', '>40', '31...40', '<=30', '<=30', '>40', '<=30', '31...40', '31...40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
})

# Calculate prior probability for each class
class_prior = data['buys_computer'].value_counts(normalize=True)
print(class_prior)


buys_computer
yes    0.642857
no     0.357143
Name: proportion, dtype: float64


In [2]:
from sklearn.naive_bayes import GaussianNB
import numpy as np

# Preparing features and target
features = data[['age', 'income', 'student', 'credit_rating']]
features = pd.get_dummies(features)  # One-hot encoding
target = data['buys_computer'].apply(lambda x: 1 if x == 'yes' else 0)

# Fit the Naive Bayes model
model = GaussianNB()
model.fit(features, target)

# Output class conditional densities
print("Class Conditional Densities for Features: \n", model.theta_)


Class Conditional Densities for Features: 
 [[0.         0.6        0.4        0.4        0.2        0.4
  0.8        0.2        0.6        0.4       ]
 [0.44444444 0.22222222 0.33333333 0.22222222 0.33333333 0.44444444
  0.33333333 0.66666667 0.33333333 0.66666667]]


In [3]:
from scipy.stats import chi2_contingency

# Example: Chi-square test between age and income
contingency_table = pd.crosstab(data['age'], data['income'])
chi2, p, dof, expected = chi2_contingency(contingency_table)

print(f"Chi2: {chi2}, p-value: {p}")
if p < 0.05:
    print("Features are not independent.")
else:
    print("Features are independent.")


Chi2: 3.3249999999999997, p-value: 0.5049810026322079
Features are independent.


In [4]:
# Building a Naive Bayes classifier
model = GaussianNB()
model.fit(features, target)

# Predicting for a new sample (for demonstration)
new_sample = np.array([[1, 0, 0, 0, 1, 0, 1, 0, 1, 0]])  # Sample features
prediction = model.predict(new_sample)
print(f"Prediction: {'yes' if prediction[0] == 1 else 'no'}")


Prediction: yes




In [5]:
# Load your own dataset
rfmid_data = pd.read_csv('RFMID_Training_Labels.csv')

# Prepare features and target (adapt according to your specific dataset)
X = rfmid_data.drop(columns=['Disease_Risk'])  # Assuming 'target' is the column you're predicting
y = rfmid_data['Disease_Risk']

# Train the Naive Bayes classifier
model = GaussianNB()
model.fit(X, y)

# Predict on a new sample (adjust based on your dataset)
sample_data = X.iloc[0:1]  # First row as an example
prediction = model.predict(sample_data)
print(f"Prediction: {prediction[0]}")


Prediction: 1
