In [None]:
import pandas as pd

from os import getcwd

from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.naive_bayes import CategoricalNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
cwd = getcwd()
data = "data"
raw_data_file = "ObesityDataSet_raw_and_data_sinthetic.csv"

df = pd.read_csv(f"{cwd}/{data}/{raw_data_file}", na_values="?").dropna() # drop any null values
df["NObeyesdad"] = df["NObeyesdad"].map(lambda x: 1 if "Obesity" in x else 0) # make obesity binary (either True or False instead of types)

df.head()

In [None]:
attr = df.drop("NObeyesdad", axis=1)
target = df["NObeyesdad"]

categorical_columns = ["Gender", "family_history_with_overweight", "FAVC", "CAEC", "SMOKE", "SCC", "CALC", "MTRANS"]

preprocessor = ColumnTransformer(transformers=[
    ('categorical', OneHotEncoder(handle_unknown="infrequent_if_exist"), categorical_columns)
])

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', CategoricalNB())
])

attr_train, attr_test, target_train, target_test = train_test_split(attr, target, test_size=0.2, random_state=513)

model.fit(attr_train, target_train)

target_pred = model.predict(attr_test)
acc = accuracy_score(target_test, target_pred)
cm = confusion_matrix(target_test, target_pred)
cr = classification_report(target_test, target_pred)

print(acc)
print(cm)
print(cr)