In [7]:
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import LabelEncoder

In [9]:
# Load the dataset
df = pd.read_csv('horse.csv')


In [10]:
# Check for missing values
print('Missing values before imputation:')
print(df.isnull().sum())


Missing values before imputation:
surgery                    0
age                        0
hospital_number            0
rectal_temp               60
pulse                     24
respiratory_rate          58
temp_of_extremities       56
peripheral_pulse          69
mucous_membrane           47
capillary_refill_time     32
pain                      55
peristalsis               44
abdominal_distention      56
nasogastric_tube         104
nasogastric_reflux       106
nasogastric_reflux_ph    246
rectal_exam_feces        102
abdomen                  118
packed_cell_volume        29
total_protein             33
abdomo_appearance        165
abdomo_protein           198
outcome                    0
surgical_lesion            0
lesion_1                   0
lesion_2                   0
lesion_3                   0
cp_data                    0
dtype: int64


In [11]:
# Identify columns with string values
string_cols = df.select_dtypes(include=['object']).columns

In [18]:
# Perform label encoding for string columns
label_encoders = {}
for col in string_cols:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col].astype(str))


In [19]:
# Create a KNN imputer object
imputer = KNNImputer(n_neighbors=5)  # You can adjust the number of neighbors (k) as per your requirement


In [20]:
# Perform KNN imputation
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)


In [21]:
# Convert the label-encoded columns back to their original format
for col in string_cols:
    df_imputed[col] = label_encoders[col].inverse_transform(df_imputed[col].astype(int))


In [22]:
# Check for missing values after imputation
print('\nMissing values after imputation:')
print(df_imputed.isnull().sum())


Missing values after imputation:
surgery                  0
age                      0
hospital_number          0
rectal_temp              0
pulse                    0
respiratory_rate         0
temp_of_extremities      0
peripheral_pulse         0
mucous_membrane          0
capillary_refill_time    0
pain                     0
peristalsis              0
abdominal_distention     0
nasogastric_tube         0
nasogastric_reflux       0
nasogastric_reflux_ph    0
rectal_exam_feces        0
abdomen                  0
packed_cell_volume       0
total_protein            0
abdomo_appearance        0
abdomo_protein           0
outcome                  0
surgical_lesion          0
lesion_1                 0
lesion_2                 0
lesion_3                 0
cp_data                  0
dtype: int64
