In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score

# loading the data
diabetes_data = pd.read_csv('diabetes_prediction_dataset.csv')

# to remove the duplicates rows from the dataset
diabetes_data.drop_duplicates(inplace=True)

diabetes_data = pd.get_dummies(diabetes_data, columns=['gender'], drop_first=True)
diabetes_data = pd.get_dummies(diabetes_data, columns=['smoking_history'], drop_first=True)
print(diabetes_data)

# split the data into test and train
X = diabetes_data.drop('diabetes', axis=1)
Y = diabetes_data['diabetes']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)

# train the model
model_rf = RandomForestRegressor()
Y_train = Y_train.values.reshape(-1, 1)
model_rf.fit(X_train, Y_train)
Y_test_reshaped = Y_test.values.reshape(-1, 1)

# test the model and find the accuracy of the model
def evaluate_model(model, X_test, Y_test):
    Y_pred = model.predict(X_test)
    Y_pred_binary = [0 if pred < 0.5 else 1 for pred in Y_pred]
    accuracy = accuracy_score(Y_test, Y_pred_binary)
    mse = mean_squared_error(Y_test, Y_pred)
    r2 = r2_score(Y_test, Y_pred)
    return mse, r2, accuracy

mse_rf, r2_rf, accuracy_rf = evaluate_model(model_rf, X_test, Y_test)
#print("Model 2 - Random Forest")
#print("Mean Squared Error:", mse_rf)
#print("R-squared:", r2_rf)
#print("Accuracy: ", accuracy_rf)


def predict_diabetes(user_input):
    # Create a DataFrame from user input
    user_df = pd.DataFrame(user_input, index=[0])

    user_df = user_df[X_train.columns]

    # Make the prediction using the trained model
    user_prediction = model_rf.predict(user_df)

    if user_prediction[0] == 0:
        return "The model predicts that the user does not have diabetes."
    else:
        return "The model predicts that the user has diabetes."

# Example user input (replace with actual user input)
user_input = {
    'gender_Male': int(input("Enter 0 or 1 for female and male: ")),
    'age': int(input("Enter age: ")),
    'hypertension': int(input("Enter 0 for false and 1 for true if you have hypertension: ")),
    'heart_disease': int(input("Enter 0 for false and 1 for true if you have heart disease: ")),
    'smoking_history_never': int(input("Enter 0 for false and 1 for true if you have smoking history: ")),
    'bmi': float(input("Enter your BMI value: ")),
    'HbA1c_level': float(input("Enter your HbA1c level: ")),
    'blood_glucose_level': int(input("Enter your blood glucose level: ")),
    'gender_Other': 0,                   # Set these values as 0 for now
    'smoking_history_current': 0,        # Set these values as 0 for now
    'smoking_history_ever': 0,           # Set these values as 0 for now
    'smoking_history_former': 0,         # Set these values as 0 for now
    'smoking_history_not current': 0     # Set these values as 0 for now
}

# Run the prediction function with user input
result = predict_diabetes(user_input)
print(result)


        age  hypertension  heart_disease    bmi  HbA1c_level  \
0      80.0             0              1  25.19          6.6   
1      54.0             0              0  27.32          6.6   
2      28.0             0              0  27.32          5.7   
3      36.0             0              0  23.45          5.0   
4      76.0             1              1  20.14          4.8   
...     ...           ...            ...    ...          ...   
99994  36.0             0              0  24.60          4.8   
99996   2.0             0              0  17.37          6.5   
99997  66.0             0              0  27.83          5.7   
99998  24.0             0              0  35.42          4.0   
99999  57.0             0              0  22.43          6.6   

       blood_glucose_level  diabetes  gender_Male  gender_Other  \
0                      140         0            0             0   
1                       80         0            0             0   
2                      158    

  model_rf.fit(X_train, Y_train)


Enter 0 or 1 for female and male: 1
Enter age: 20
Enter 0 for false and 1 for true if you have hypertension: 1
Enter 0 for false and 1 for true if you have heart disease: 0
Enter 0 for false and 1 for true if you have smoking history: 0
Enter your BMI value: 19.1
Enter your HbA1c level: 5
Enter your blood glucose level: 90
The model predicts that the user does not have diabetes.


In [14]:
Y

array([1.    , 1.    , 0.1475, ..., 1.    , 0.02  , 0.27  ])