In [3]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('diabetes.csv')

# Function to create binary columns
def create_binary_columns(df, column, thresholds, prefix, is_inter_ordinal=False):
    for threshold in thresholds:
        if is_inter_ordinal:
            df[f'{prefix}<={threshold}'] = (df[column] <= threshold).astype(int)
            df[f'{prefix}>={threshold}'] = (df[column] >= threshold).astype(int)
        else:
            df[f'{prefix}>={threshold}'] = (df[column] >= threshold).astype(int)
    return df

# Binarize each feature
df = create_binary_columns(df, 'Pregnancies', [1, 3, 6], 'Preg')
df = create_binary_columns(df, 'Glucose', [100, 125, 140], 'Gluc', True)
df = create_binary_columns(df, 'BloodPressure', [60, 80, 90, 120], 'BP', True)
df = create_binary_columns(df, 'SkinThickness', [20, 25, 30], 'ST')
df = create_binary_columns(df, 'Insulin', [100, 150, 200], 'Ins')
df = create_binary_columns(df, 'BMI', [18.5, 25, 30, 35], 'BMI', True)
df = create_binary_columns(df, 'DiabetesPedigreeFunction', [0.5, 1, 1.5], 'DPF')
df = create_binary_columns(df, 'Age', [30, 45, 60], 'Age', True)

# Binarize Outcome
df['Diabetic'] = df['Outcome']
df['Not_Diabetic'] = 1 - df['Outcome']

# Drop original columns
df = df.drop(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'], axis=1)

# Save the binarized dataset
df.to_csv('binarized_diabetes.csv', index=False)
# Display the first few rows of the binarized dataset
print(df.head())

   Preg>=1  Preg>=3  Preg>=6  Gluc<=100  Gluc>=100  Gluc<=125  Gluc>=125  \
0        1        1        1          0          1          0          1   
1        1        0        0          1          0          1          0   
2        1        1        1          0          1          0          1   
3        1        0        0          1          0          1          0   
4        0        0        0          0          1          0          1   

   Gluc<=140  Gluc>=140  BP<=60  ...  DPF>=1  DPF>=1.5  Age<=30  Age>=30  \
0          0          1       0  ...       0         0        0        1   
1          1          0       0  ...       0         0        0        1   
2          0          1       0  ...       0         0        0        1   
3          1          0       0  ...       0         0        1        0   
4          1          0       1  ...       1         1        0        1   

   Age<=45  Age>=45  Age<=60  Age>=60  Diabetic  Not_Diabetic  
0        0        1   