In [1]:

import pandas as pd
from sklearn.preprocessing import LabelEncoder
import joblib
from sklearn import preprocessing
def transform_user_data(user_df):
    user_df = user_df.copy()
        # Create age_bin and BMI_Class
        
    user_df['age_bin'] = pd.cut(user_df['age'], [0, 20, 30, 35, 40, 45, 50, 55, 60, 150], 
                                labels=['0-20', '20-30', '30-35', '35-40','40-45','45-50','50-55','55-60','60-65'])

    user_df['bmi'] = user_df['weight'] / ((user_df['height'] / 100) ** 2)

    rating = []
    for row in user_df['bmi']:
        if row < 18.5:
            rating.append(1)  # UnderWeight
        elif 18.5 <= row <= 24.9:
            rating.append(2)  # NormalWeight
        elif 25 <= row <= 29.9:
            rating.append(3)  # OverWeight
        elif 30 <= row <= 34.9:
            rating.append(4)  # ClassObesity_1
        elif 35 <= row <= 39.9:
            rating.append(5)  # ClassObesity_2
        elif 40 <= row <= 49.9:
            rating.append(6)  # ClassObesity_3
        elif row >= 50:
            rating.append('Error')
        else:
            rating.append('Not_Rated')

    user_df['BMI_Class'] = rating

    # Calculate MAP and its corresponding class
    user_df['MAP'] = ((2 * user_df['ap_lo']) + user_df['ap_hi']) / 3

    map_values = []
    for row in user_df['MAP']:
        if row < 69.9:
            map_values.append(1)  # Low
        elif 70 <= row <= 79.9:
            map_values.append(2)  # Normal
        elif 80 <= row <= 89.9:
            map_values.append(3)  # Normal
        elif 90 <= row <= 99.9:
            map_values.append(4)  # Normal
        elif 100 <= row <= 109.9:
            map_values.append(5)  # High
        elif 110 <= row <= 119.9:
            map_values.append(6)  # Normal
        elif row >= 120:
            map_values.append(7)
        else:
            map_values.append('Not_Rated')

    user_df['MAP_Class'] = map_values

    # Drop unnecessary columns
    user_df=user_df[["gender","height","weight","bmi","ap_hi","ap_lo","MAP","age","age_bin","BMI_Class","MAP_Class","cholesterol","gluc","smoke","active"]]
    return user_df






In [2]:
import warnings

# Suppress SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [3]:
def transform_cat(user_df):
    user_df = user_df.copy()
    user_cat = user_df[["gender","age_bin","BMI_Class","MAP_Class","cholesterol","gluc","smoke","active"]].copy()

    # Mapping dictionaries for each feature
    gender_mapping = {1: 0, 2: 1}
    age_bin_mapping = {'20-30': 0, '35-40': 1, '40-45': 2, '45-50': 3, '50-55': 4, '55-60': 5, '60-65': 6}
    BMI_Class_mapping = {1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5}
    MAP_Class_mapping = {2: 0, 3: 1, 4: 2, 5: 3, 6: 4, 7: 5}
    cholesterol_mapping = {1: 0, 2: 1, 3: 2}
    gluc_mapping = {1: 0, 2: 1, 3: 2}
  # Apply the mapping to user_data
    user_cat['gender'] = user_cat['gender'].map(gender_mapping)
    user_cat['age_bin'] = user_cat['age_bin'].map(age_bin_mapping).astype(int)
    user_cat['BMI_Class'] = user_cat['BMI_Class'].map(BMI_Class_mapping)
    user_cat['MAP_Class'] = user_cat['MAP_Class'].map(MAP_Class_mapping)
    user_cat['cholesterol'] = user_cat['cholesterol'].map(cholesterol_mapping)
    user_cat['gluc'] = user_cat['gluc'].map(gluc_mapping)

    return user_cat



In [4]:
def apply_kmodes(user_cat):
    if user_cat.iloc[0]['gender'] == 0:  # Male
        kmodes_model_male = joblib.load('kmodes_model_male.joblib')
        clusters_male = kmodes_model_male.predict(user_cat)
        user_cat.insert(0, "Cluster", clusters_male, True)
        user_cat["Cluster"].replace({0: 2, 1: 3}, inplace=True)
        print(user_cat.Cluster)
        return user_cat
    else:  # Female
        kmodes_model_female = joblib.load('kmodes_model_female.joblib')
        clusters_female = kmodes_model_female.predict(user_cat)
        user_cat.insert(0, "Cluster", clusters_female, True)
        return user_cat

In [6]:
data=    {
  "age": 54,
  "height":168,
  "weight": 62,
  "gender": 2,
  "ap_hi": 110,
  "ap_lo": 80,
  "cholesterol": 1,
  "gluc": 1,
  "smoke": 0,
  "alco": 1,
  "active": 0,

}

In [7]:
df=(pd.DataFrame([data]))
df.head()


Unnamed: 0,age,height,weight,gender,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active
0,54,168,62,2,110,80,1,1,0,1,0


In [8]:
df=transform_user_data(df)
df.head()

Unnamed: 0,gender,height,weight,bmi,ap_hi,ap_lo,MAP,age,age_bin,BMI_Class,MAP_Class,cholesterol,gluc,smoke,active
0,2,168,62,21.96712,110,80,90.0,54,50-55,2,4,1,1,0,0


In [9]:
df_cat=transform_cat(df)
df_cat.head()

Unnamed: 0,gender,age_bin,BMI_Class,MAP_Class,cholesterol,gluc,smoke,active
0,1,4,1,2,0,0,0,0


In [10]:
df_cat=apply_kmodes(df_cat)
df_cat.head()

Unnamed: 0,Cluster,gender,age_bin,BMI_Class,MAP_Class,cholesterol,gluc,smoke,active
0,0,1,4,1,2,0,0,0,0


In [11]:
df_cat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Cluster      1 non-null      uint16
 1   gender       1 non-null      int64 
 2   age_bin      1 non-null      int32 
 3   BMI_Class    1 non-null      int64 
 4   MAP_Class    1 non-null      int64 
 5   cholesterol  1 non-null      int64 
 6   gluc         1 non-null      int64 
 7   smoke        1 non-null      int64 
 8   active       1 non-null      int64 
dtypes: int32(1), int64(7), uint16(1)
memory usage: 190.0 bytes


In [12]:
loaded_rf_model = joblib.load('rf_model_73.joblib')

In [13]:
predictions = loaded_rf_model.predict(df_cat)


In [14]:
predictions

array([0], dtype=int64)

In [15]:
def preprocess_data(data):
    df = pd.DataFrame([data])
    df = transform_user_data(df)
    df_cat = transform_cat(df)
    df_cat = apply_kmodes(df_cat)
    return df_cat

In [16]:
preprocess_data(data).head()

Unnamed: 0,Cluster,gender,age_bin,BMI_Class,MAP_Class,cholesterol,gluc,smoke,active
0,0,1,4,1,2,0,0,0,0


In [113]:
data=    {
  "age": 70,
  "height":160,
  "weight": 100,
  "gender": 2,
  "ap_hi": 120,
  "ap_lo": 80,
  "cholesterol": 2,
  "gluc": 2,
  "smoke": 1,
  "alco": 1,
  "active": 0,

}
predictions = loaded_rf_model.predict(preprocess_data(data))


In [87]:
predictions

array([1], dtype=int64)

In [6]:
import joblib
loaded_rf_model = joblib.load('rf_model_73.joblib') 
data={
  "age": 70,
  "height":160,
  "weight": 100,
  "gender": 2,
  "ap_hi": 120,
  "ap_lo": 80,
  "cholesterol": 2,
  "gluc": 2,
  "smoke": 1,
  "alco": 1,
  "active": 0,

}
from preprocess_data import preprocess_data
predictions = loaded_rf_model.predict(preprocess_data(data))
predictions


array([1], dtype=int64)

In [5]:
predictions

array([1], dtype=int64)