In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('credit_score.csv')

In [12]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

In [5]:
df

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership,Credit Score
0,25,Female,50000,Bachelor's Degree,Single,0,Rented,High
1,30,Male,100000,Master's Degree,Married,2,Owned,High
2,35,Female,75000,Doctorate,Married,1,Owned,High
3,40,Male,125000,High School Diploma,Single,0,Owned,High
4,45,Female,100000,Bachelor's Degree,Married,3,Owned,High
...,...,...,...,...,...,...,...,...
159,29,Female,27500,High School Diploma,Single,0,Rented,Low
160,34,Male,47500,Associate's Degree,Single,0,Rented,Average
161,39,Female,62500,Bachelor's Degree,Married,2,Owned,High
162,44,Male,87500,Master's Degree,Single,0,Owned,High


In [6]:
df.isnull().sum()

Age                   0
Gender                0
Income                0
Education             0
Marital Status        0
Number of Children    0
Home Ownership        0
Credit Score          0
dtype: int64

In [7]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=['Credit Score']),df['Credit Score'],
                                                test_size=0.2)

In [8]:
X_train

Unnamed: 0,Age,Gender,Income,Education,Marital Status,Number of Children,Home Ownership
18,28,Female,30000,Associate's Degree,Single,0,Rented
17,52,Male,125000,High School Diploma,Married,0,Owned
152,52,Male,130000,High School Diploma,Married,0,Owned
124,35,Female,85000,Doctorate,Married,1,Owned
39,46,Female,95000,High School Diploma,Married,1,Owned
...,...,...,...,...,...,...,...
60,35,Female,85000,Doctorate,Married,1,Owned
104,51,Male,150000,Bachelor's Degree,Married,0,Owned
29,25,Female,55000,Bachelor's Degree,Single,0,Rented
106,32,Male,57500,Associate's Degree,Single,0,Rented


<h3>Without using Column Transformer</h3>

In [19]:
# Ordinalencoding -> Education
oe = OrdinalEncoder(categories=[['High School Diploma', "Associate's Degree", "Bachelor's Degree", "Master's Degree", 'Doctorate']])
X_train_edu = oe.fit_transform(X_train[['Education']])

# also the test data
X_test_edu = oe.transform(X_test[['Education']])

X_train_edu.shape

(131, 1)

In [23]:
# OneHotEncoding -> Gender, Marital Status, Home Ownership
ohe = OneHotEncoder(drop='first', sparse_output=False)
X_train_3 = ohe.fit_transform(X_train[['Gender','Marital Status','Home Ownership']])

# also the test data
X_test_3 = ohe.transform(X_test[['Gender','Marital Status','Home Ownership']])

X_train_3.shape

(131, 3)

In [26]:
# Extracting Age, Income, Number of Children
X_train_AIN = X_train.drop(columns=['Gender','Education','Marital Status','Home Ownership']).values
# also the test data
X_test_AIN = X_test.drop(columns=['Gender','Education','Marital Status','Home Ownership']).values

X_train_AIN.shape

(131, 3)

In [28]:
X_train_transformed = np.concatenate((X_train_AIN,X_train_edu,X_train_3),axis=1)
# also the test data
X_test_transformed = np.concatenate((X_test_AIN,X_test_edu, X_test_3),axis=1)

X_train_transformed.shape

(131, 7)

In [29]:
df1 = pd.DataFrame(X_train_transformed)

<h3>Using ColumnTransformer</h3>

In [31]:
from sklearn.compose import ColumnTransformer

In [34]:
transformer = ColumnTransformer(transformers=[
    ('tnf1',OrdinalEncoder(categories=[['High School Diploma', "Associate's Degree", "Bachelor's Degree", "Master's Degree", 'Doctorate']]),['Education']),
    ('tnf2',OneHotEncoder(sparse_output=False,drop='first'),['Gender','Marital Status','Home Ownership'])
],remainder='passthrough')

In [35]:
transformer.fit_transform(X_train).shape

(131, 7)

In [36]:
transformer.transform(X_test).shape

(33, 7)