In [32]:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder

<div style='text-align: center; font-size:40px;'>Example 1</div>

In [33]:
df = pd.DataFrame({
    'Size': ['M', 'XL', 'S', 'L']
})

In [34]:
df

Unnamed: 0,Size
0,M
1,XL
2,S
3,L


In [35]:
order = [['S', 'M', 'L', 'XL', 'XXL']]
order

[['S', 'M', 'L', 'XL', 'XXL']]

##### ✔️ Why do we use double brackets in order=[['S','M','L','XL','XXL']]?
OrdinalEncoder expects a list of lists

→ one inner list per column you want to encode.

Even if you have only one column, sklearn still expects it in the form:

        categories = [ list_for_column_1 ,
                    list_for_column_2 ,
                    ... ]


Since we are encoding only one column (Size), we give:

        categories = [ ['S','M','L','XL','XXL'] ]


* Outer [ ] = list of columns

* Inner [ ] = order for that specific column

In [36]:
encoder = OrdinalEncoder(categories=order)

In [37]:
df['Size_encoded'] = encoder.fit_transform(df[['Size']])

##### ✅ Why [['Size']] instead of just ['Size']?
Because:
* ['Size'] → a list of column names
* [['Size']] → a DataFrame

And sklearn requires a DataFrame, not a Series.

In [38]:
df

Unnamed: 0,Size,Size_encoded
0,M,1.0
1,XL,3.0
2,S,0.0
3,L,2.0


<div style="text-align: center; font-size:40px;">Example Why double brackets</div>

In [39]:
df_db = pd.DataFrame({
    'Size': ['M', 'XL', 'S', 'L'],
    'Quality': ['Medium', 'High', 'Low', 'Medium'],
    'Education': ['Bachelor', 'Master', 'Primary', 'PhD']
})
df_db

Unnamed: 0,Size,Quality,Education
0,M,Medium,Bachelor
1,XL,High,Master
2,S,Low,Primary
3,L,Medium,PhD


In [40]:
# Reason why double brackets [[]]
categories = [
    ['S', 'M', 'L', 'XL', 'XXL'],   # Order for Size
    ['Low', 'Medium', 'High'],     # Order for Quality
    ['Primary', 'Secondary', 'Bachelor', 'Master', 'PhD']  # Order for Education
]

For different columns we have to put different list of categories under a single list.

In [41]:
encoder_db = OrdinalEncoder(categories=categories)

In [42]:
df_db[['Size_enc', 'Quality_enc', 'Education_enc']] = encoder_db.fit_transform(
    df_db[['Size', 'Quality', 'Education']]
)

In [43]:
df_db

Unnamed: 0,Size,Quality,Education,Size_enc,Quality_enc,Education_enc
0,M,Medium,Bachelor,1.0,1.0,2.0
1,XL,High,Master,3.0,2.0,3.0
2,S,Low,Primary,0.0,0.0,0.0
3,L,Medium,PhD,2.0,1.0,4.0


<div style="text-align: center; font-size:40px;">Example 2</div>

In [44]:
data = {
    'Student': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Grade': ['A', 'B', 'C', 'A', 'B']
}
df2 = pd.DataFrame(data)
print(df2)

   Student Grade
0    Alice     A
1      Bob     B
2  Charlie     C
3    David     A
4      Eva     B


In [45]:
encoder2 = OrdinalEncoder(categories=[['A', 'B', 'C']])

In [46]:
df2['Grade_Encoded'] = encoder2.fit_transform(df2[['Grade']])

In [47]:
df2

Unnamed: 0,Student,Grade,Grade_Encoded
0,Alice,A,0.0
1,Bob,B,1.0
2,Charlie,C,2.0
3,David,A,0.0
4,Eva,B,1.0
