In [1]:
# Import LabelEncoder for encoding categories
# Import pandas to create and manage the dataset
from sklearn.preprocessing import LabelEncoder
import pandas as pd

### 📊 Sample Categorical Data
We’ll use a list of city names as our sample categorical feature.

In [2]:
# Create a sample dataset with a categorical column
data = {'City': ['New York', 'Paris', 'Berlin', 'Paris', 'Berlin', 'New York']}
df = pd.DataFrame(data)

# Display the original dataset
df

Unnamed: 0,City
0,New York
1,Paris
2,Berlin
3,Paris
4,Berlin
5,New York


### ⚙️ Apply LabelEncoder
LabelEncoder converts text labels into integers — useful for encoding target variables or ordinal features.

In [3]:
# Initialize the encoder
le = LabelEncoder()

# Apply encoding on the 'City' column
df['City_encoded'] = le.fit_transform(df['City'])

# Display the DataFrame with encoded values
df

Unnamed: 0,City,City_encoded
0,New York,1
1,Paris,2
2,Berlin,0
3,Paris,2
4,Berlin,0
5,New York,1


In [4]:
# View the mapping of labels to encoded values
label_map = dict(zip(le.classes_, le.transform(le.classes_)))
print("Label Mapping:", label_map)

Label Mapping: {'Berlin': np.int64(0), 'New York': np.int64(1), 'Paris': np.int64(2)}


> 🧾 **Note**:
If you’re seeing values like `np.int64(0)` instead of plain `0`, that’s completely normal.
It just means the integers are stored as **NumPy 64-bit integers**, which is common in scikit-learn and NumPy operations.
✅ You can safely treat them as regular Python `int` values — they behave the same in most cases.