In [1]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from feature_engine.imputation import (
    AddMissingIndicator,
    MeanMedianImputer,
    CategoricalImputer,
)
from feature_engine.encoding import (
    OrdinalEncoder,
)

In [3]:
df = pd.read_csv("insurance.csv")
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [4]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(['charges'], axis=1), df['charges'], test_size = 0.1, random_state = 0)

In [5]:
vars_with_na = [var for var in df.columns if df[var].isnull().sum() > 0]
print(vars_with_na)

[]


In [7]:
# Filtering the categorical features from all the features

cat_vars = [var for var in df.columns if df[var].dtype == 'O']

cat_vars


['sex', 'smoker', 'region']

In [8]:
# set up the encoder
cat_encoder = OrdinalEncoder(encoding_method='ordered', variables=cat_vars)

# create the mappings
cat_encoder.fit(X_train, y_train)

# mappings are stored and class can be saved
cat_encoder.encoder_dict_

{'sex': {'female': 0, 'male': 1},
 'smoker': {'no': 0, 'yes': 1},
 'region': {'southwest': 0, 'northwest': 1, 'northeast': 2, 'southeast': 3}}

In [9]:
#Transforming X_train and X-test

X_train = cat_encoder.transform(X_train)
X_test = cat_encoder.transform(X_test)

In [10]:
X_train

Unnamed: 0,age,sex,bmi,children,smoker,region
461,42,1,30.00,0,1,0
322,34,1,30.80,0,1,0
224,42,1,24.64,0,1,3
711,50,0,23.54,2,0,3
58,53,0,22.88,1,1,3
...,...,...,...,...,...,...
763,27,1,26.03,0,0,2
835,42,1,35.97,2,0,3
1216,40,1,25.08,0,0,3
559,19,1,35.53,0,0,1
