# Data Preprocessing: Label Encoding

## Import Modules

In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

## Prepare Data

In [10]:
data = {'date': ['2023-10-15', '2023-10-16', '2023-10-17', '2023-10-18']}
df = pd.DataFrame(data)
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    4 non-null      object
dtypes: object(1)
memory usage: 164.0+ bytes


Unnamed: 0,date
0,2023-10-15
1,2023-10-16
2,2023-10-17
3,2023-10-18


## Convert the "date" column into date format

In [11]:
df['date'] = pd.to_datetime(df['date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    4 non-null      datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 164.0 bytes


## Add more additional information/columns

In [12]:
df['weekday_name'] = df['date'].dt.day_name()
df

Unnamed: 0,date,weekday_name
0,2023-10-15,Sunday
1,2023-10-16,Monday
2,2023-10-17,Tuesday
3,2023-10-18,Wednesday


## Label Encoding with Pandas

In [14]:
label_encoder = LabelEncoder()
df['weekday_label'] = label_encoder.fit_transform(df['weekday_name'])
df

Unnamed: 0,date,weekday_name,weekday_label
0,2023-10-15,Sunday,1
1,2023-10-16,Monday,0
2,2023-10-17,Tuesday,2
3,2023-10-18,Wednesday,3


In [15]:
print(label_encoder.classes_)

['Monday' 'Sunday' 'Tuesday' 'Wednesday']
