In [1]:
import utils

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from category_encoders import OrdinalEncoder

In [2]:
dataset = utils.get_data()

enc = OrdinalEncoder()
for column in dataset.select_dtypes(include='category'):
    dataset[column] = enc.fit_transform(dataset[column])

## Split

In [3]:
# Split the data into features (X) and target (y)
X = dataset.drop(dataset.target_obs_level.name, axis=1)
y = dataset.target_obs_level

# Use 20% to test, and the remaining to train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Fit & evaluate

In [4]:
classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9621749408983451


## Save the model

In [5]:
from joblib import dump
from utils.constants import MODEL_PATH

with open(MODEL_PATH, "wb") as f:
    dump(classifier, f, protocol=5)

## Load the saved model

In [6]:
from joblib import load
from pandas import DataFrame

with open(MODEL_PATH, "rb") as f:
    clf: RandomForestClassifier = load(f)

feature_names = ['sex','age','height','weight','has_family_history','freq_high_calorie_intake','veg_in_meals','num_meals','food_bw_meals','is_smoker','water_intake','monitors_calories','physical_act_freq','screen_time','alcohol_freq','transportation']
user_input_values = [1,21.0,1.62,64.0,True,False,2.0,3.0,1,False,2.0,False,0.0,1.0, 1, 1]

model_input = DataFrame([user_input_values], columns=feature_names)
prediction = clf.predict(model_input)

print(f"Prediction: {prediction[0]}")

Prediction: 1
