In [2]:
import joblib
import pandas as pd
import numpy as np

def clip_outliers(data, lower_percentile=0.25, upper_percentile=0.75, factor=1.5):
    df_clipped = data.copy()
    for column in df_clipped.columns:
        Q1 = df_clipped[column].quantile(lower_percentile)
        Q3 = df_clipped[column].quantile(upper_percentile)
        IQR = Q3 - Q1

        lower_bound = Q1 - (factor * IQR)
        upper_bound = Q3 + (factor * IQR)

        df_clipped[column] = np.clip(df_clipped[column], lower_bound, upper_bound)
    
    return df_clipped

preprocessing_pipeline = joblib.load('preprocessing_pipeline1.pkl')
final_model = joblib.load('final_rff_model.pkl')

feature_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'Age']

print("Please enter the following information:")

Pregnancies = int(input("Pregnancies: "))
Glucose = float(input("Glucose: "))
BloodPressure = float(input("BloodPressure: "))
SkinThickness = float(input("SkinThickness: "))
Insulin = float(input("Insulin: "))
BMI = float(input("BMI: "))
Age = float(input("Age: "))

input_data = {
    'Pregnancies': Pregnancies,
    'Glucose': Glucose,
    'BloodPressure' : BloodPressure,
    'SkinThickness' : SkinThickness,
    'Insulin' : Insulin, 
    'BMI' : BMI, 
    'Age' : Age
}

input_df = pd.DataFrame([input_data])

print("\nInput Data (with features):")
for feature in feature_names:
    print(f"{feature}: {input_data[feature]}")

preprocessed_input = preprocessing_pipeline.transform(input_df)

prediction = final_model.predict(preprocessed_input)

if prediction[0] == 1:
    print("Prediction: The patient is likely to have diabetes.")
else:
    print("Prediction: The patient is unlikely to have diabetes.")


Please enter the following information:

Input Data (with features):
Pregnancies: 5
Glucose: 166.0
BloodPressure: 72.0
SkinThickness: 19.0
Insulin: 177.0
BMI: 25.8
Age: 51.0
Prediction: The patient is likely to have diabetes.


