In [1]:
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf

#### Load Feature Engineering Objects

In [2]:
# Define each variable as ordinal, binary, nominal, or numerical
ordinal_features = ['Alcohol_Consumption', 'Obesity', 'Healthcare_Access', 
                    'Preventive_Care', 'Seafood_Consumption']

binary_features = ['Smoking_Status', 'Hepatitis_B_Status', 'Hepatitis_C_Status',
                   'Diabetes', 'Screening_Availability', 'Treatment_Availability', 
                   'Liver_Transplant_Access', 'Herbal_Medicine_Use']

nominal_features = ['Country', 'Region', 'Gender', 'Rural_or_Urban', 'Ethnicity']

numerical_features = ['Population', 'Incidence_Rate', 'Mortality_Rate', 'Age', 'Cost_of_Treatment', 'Survival_Rate']

target_variable = ['Prediction']


In [3]:
with open("ordinal_encoder.pkl", "rb") as f:
    ordinal_encoder = pickle.load(f)

with open("one_hot_encoder.pkl", "rb") as f:
    one_hot_encoder = pickle.load(f)

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# Load feature names used during training
with open("feature_names.pkl", "rb") as f:
    feature_names = pickle.load(f)

#### Load Trained ML & Deep Learning Models


In [4]:
with open("rf_model.pkl", "rb") as model_file:
    rf_model = pickle.load(model_file)

deep_learning_model = tf.keras.models.load_model("deep_learning_model.keras")

#### Preprocess Data

In [5]:
def preprocess_input(data):
    df = pd.DataFrame([data])

    # Apply Ordinal Encoding
    df[ordinal_features] = ordinal_encoder.transform(df[ordinal_features])

    # Apply Binary Encoding
    for col in binary_features:
        df[col] = df[col].map({
            'No': 0, 'Yes': 1,
            'Negative': 0, 'Positive': 1,
            'Non-Smoker': 0, 'Smoker': 1,
            'Not Available': 0, 'Available': 1,
        })

    # Apply One-Hot Encoding
    encoded_nominals = one_hot_encoder.transform(df[nominal_features])
    nominal_feature_names = one_hot_encoder.get_feature_names_out(nominal_features)
    df_nominal = pd.DataFrame(encoded_nominals, columns=nominal_feature_names, index=df.index)

    # Drop original categorical columns and join encoded ones
    df = df.drop(columns=nominal_features).join(df_nominal)

    # Apply Scaling to Numerical Features
    df[numerical_features] = scaler.transform(df[numerical_features])

    # **Force column order to match training set (Fix the error)**
    df = df[feature_names]

    return df

#### Define Prediction Functions


In [6]:
def predict_rf(data):
    input_features = preprocess_input(data)
    prediction = rf_model.predict(input_features)
    return {"prediction": int(prediction[0])}

def predict_dl(data):
    input_features = preprocess_input(data)
    prediction = deep_learning_model.predict(input_features)
    return {"prediction": float(prediction[0, 0])}


#### Test with Sample Input


In [7]:
sample_input = {
    "Country": "United States",
    "Region": "North America",
    "Population": 331000000,
    "Incidence_Rate": 4.2,
    "Mortality_Rate": 1.5,
    "Gender": "Male",
    "Age": 55,
    "Alcohol_Consumption": "Moderate",
    "Smoking_Status": "Non-Smoker",
    "Hepatitis_B_Status": "Negative",
    "Hepatitis_C_Status": "Negative",
    "Obesity": "Overweight",
    "Diabetes": "No",
    "Rural_or_Urban": "Urban",
    "Seafood_Consumption": "Low",
    "Herbal_Medicine_Use": "Yes",
    "Healthcare_Access": "Good",
    "Screening_Availability": "Available",
    "Treatment_Availability": "Available",
    "Liver_Transplant_Access": "No",
    "Ethnicity": "Caucasian",
    "Preventive_Care": "Moderate",
    "Cost_of_Treatment": 20000,
    "Survival_Rate": 95.0
}

# Predict using Random Forest
print("\n Random Forest Prediction:", predict_rf(sample_input))

# Predict using Deep Learning
print("\n Deep Learning Prediction:", predict_dl(sample_input))


 Random Forest Prediction: {'prediction': 0}
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step

 Deep Learning Prediction: {'prediction': 0.20820412039756775}
