# Importing Libraries

In [39]:
# Import the NumPy library and alias it as 'np'
import numpy as np

# Import the Pandas library and alias it as 'pd'
import pandas as pd

# Importing Dataset

In [40]:
# Read data from a CSV file named 'customer.csv' and store it in a Pandas DataFrame called 'df'
df = pd.read_csv('customer.csv')

In [41]:
# Display the first few rows of the DataFrame 'df' to inspect the data
df.head()

Unnamed: 0,age,gender,review,education,purchased
0,30,Female,Average,School,No
1,68,Female,Poor,UG,No
2,70,Female,Good,PG,No
3,72,Female,Good,PG,No
4,16,Female,Average,UG,No


# Data Preprocessing

Since the first two columns, 'age' and 'gender,' are not ordinal columns, we are dropping them.

In [42]:
# Using iloc to select all rows and all columns starting from the third column (index 2) effectively removing the 
# 'age' and 'gender' columns
df = df.iloc[:, 2:]

# Display the first few rows of the updated DataFrame 'df' after dropping the columns
df.head()

Unnamed: 0,review,education,purchased
0,Average,School,No
1,Poor,UG,No
2,Good,PG,No
3,Good,PG,No
4,Average,UG,No


In [43]:
# Extracting features (X) and target variable (y) from the DataFrame 'df'.

# Extracting the first two columns of 'df' to create the feature matrix 'X'.
X = df.iloc[:, 0:2]

# Extracting the third column of 'df' to create the target variable 'y'.
y = df.iloc[:, 2]

In [44]:
# Displaying the feature matrix 'X' which contains input values
X

Unnamed: 0,review,education
0,Average,School
1,Poor,UG
2,Good,PG
3,Good,PG
4,Average,UG
5,Average,School
6,Good,School
7,Poor,School
8,Average,UG
9,Good,UG


In [None]:
# The review column values: ['Poor', 'Average', 'Good'] will be encoded to [0, 1, 2]
# The education column values: ['School', 'UG', 'PG'] will be encoded to [0, 1, 2]

# Ordinal Encoding

In [45]:
# Import the OrdinalEncoder class from scikit-learn's preprocessing module
from sklearn.preprocessing import OrdinalEncoder

In [46]:
# Create an instance of the OrdinalEncoder class with predefined categories

# The categories argument specifies the mapping of ordinal values to numerical labels

# For the review column: ['Poor', 'Average', 'Good'] will be mapped to [0, 1, 2]
# For the education column: ['School', 'UG', 'PG'] will be mapped to [0, 1, 2]

oe = OrdinalEncoder(categories=[['Poor', 'Average', 'Good'], ['School', 'UG', 'PG']])

In [47]:
# Fit the encoder to the feature matrix 'X' and transform it in one step
X = oe.fit_transform(X)

In [48]:
# Displaying the feature matrix 'X' which contains input ordinal encoded values 
X

array([[1., 0.],
       [0., 1.],
       [2., 2.],
       [2., 2.],
       [1., 1.],
       [1., 0.],
       [2., 0.],
       [0., 0.],
       [1., 1.],
       [2., 1.],
       [2., 1.],
       [2., 1.],
       [0., 0.],
       [1., 0.],
       [0., 2.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [2., 0.],
       [0., 2.],
       [1., 0.],
       [1., 2.],
       [0., 2.],
       [2., 0.],
       [1., 2.],
       [2., 0.],
       [0., 2.],
       [0., 2.],
       [0., 0.],
       [1., 1.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [2., 2.],
       [1., 0.],
       [0., 0.],
       [2., 1.],
       [1., 2.],
       [2., 0.],
       [0., 2.],
       [2., 0.],
       [2., 2.],
       [2., 2.],
       [0., 2.],
       [1., 1.],
       [0., 2.],
       [0., 2.],
       [2., 2.],
       [2., 1.],
       [2., 1.]])

In [50]:
# Retrieve the categories used by the OrdinalEncoder after fitting and transforming

# 'oe.categories_' contains the mapping of ordinal categories to numerical labels
# for each encoded column, reflecting the order of categories defined during encoder initialization.
categories = oe.categories_
categories

[array(['Poor', 'Average', 'Good'], dtype=object),
 array(['School', 'UG', 'PG'], dtype=object)]